Skip to content

Instantly share code, notes, and snippets.

@swairshah
Created January 8, 2026 19:18
Show Gist options
  • Select an option

  • Save swairshah/a6217b80b0e093803d82038b9aff5f74 to your computer and use it in GitHub Desktop.

Select an option

Save swairshah/a6217b80b0e093803d82038b9aff5f74 to your computer and use it in GitHub Desktop.
Analyze Claude Code session JSONL files for tool usage statistics
#!/usr/bin/env python3
"""
Analyze tool usage from a Claude Code JSONL session file.
Extracts all tool_use messages and generates statistics.
"""
import json
import re
import shlex
import sys
from collections import Counter
from pathlib import Path
def parse_bash_command(command: str) -> dict:
"""
Parse a bash command to extract the program(s) being called.
Returns info about the command structure.
"""
if not command:
return {'programs': [], 'raw': ''}
# Remove comments (lines starting with # or inline comments)
command = re.sub(r'#.*$', '', command, flags=re.MULTILINE)
# Split by pipes, &&, ||, ; to get individual commands
# This regex splits on |, &&, ||, ; while preserving the structure
parts = re.split(r'\s*(?:\||\|\||&&|;)\s*', command)
programs = []
for part in parts:
part = part.strip()
if not part:
continue
# Skip if it starts with a comment
if part.startswith('#'):
continue
# Handle subshells and command substitution
part = re.sub(r'^\(|\)$', '', part)
part = re.sub(r'^\$\(|\)$', '', part)
# Skip variable assignments at the start
if re.match(r'^[A-Za-z_][A-Za-z0-9_]*=', part):
# Check if there's a command after the assignment
match = re.search(r'^[A-Za-z_][A-Za-z0-9_]*=\S+\s+(.+)', part)
if match:
part = match.group(1)
else:
continue
# Handle env/sudo/time prefixes
prefixes = ['env', 'sudo', 'time', 'nice', 'nohup', 'xargs']
while True:
first_word = part.split()[0] if part.split() else ''
if first_word in prefixes:
rest = part.split(maxsplit=1)
part = rest[1] if len(rest) > 1 else ''
else:
break
if not part:
continue
# Get the actual program name
try:
tokens = shlex.split(part)
if tokens:
prog = tokens[0]
# Handle path-qualified commands
prog = prog.split('/')[-1]
# Filter out things that don't look like program names
if prog and re.match(r'^[a-zA-Z][a-zA-Z0-9_.-]*$', prog):
programs.append(prog)
except ValueError:
# shlex failed, try simple split
first = part.split()[0] if part.split() else ''
if first:
prog = first.split('/')[-1]
if prog and re.match(r'^[a-zA-Z][a-zA-Z0-9_.-]*$', prog):
programs.append(prog)
return {
'programs': programs,
'raw': command[:100],
}
def extract_tool_calls(jsonl_path: str) -> list[dict]:
"""Extract all tool_use calls from a JSONL file."""
tool_calls = []
with open(jsonl_path, 'r') as f:
for line_num, line in enumerate(f, 1):
line = line.strip()
if not line:
continue
try:
entry = json.loads(line)
except json.JSONDecodeError as e:
print(f"Warning: Failed to parse line {line_num}: {e}", file=sys.stderr)
continue
# Look for message.content which contains tool_use entries
message = entry.get('message', {})
content = message.get('content', [])
# content can be a string or a list
if not isinstance(content, list):
continue
for item in content:
if isinstance(item, dict) and item.get('type') == 'tool_use':
tool_info = {
'name': item.get('name'),
'id': item.get('id'),
'input': item.get('input', {}),
'timestamp': entry.get('timestamp'),
'uuid': entry.get('uuid'),
}
# Parse Bash commands to extract programs
if item.get('name') == 'Bash':
cmd = item.get('input', {}).get('command', '')
tool_info['bash_parsed'] = parse_bash_command(cmd)
tool_calls.append(tool_info)
return tool_calls
def generate_statistics(tool_calls: list[dict]) -> dict:
"""Generate statistics from tool calls."""
tool_names = [tc['name'] for tc in tool_calls]
tool_counts = Counter(tool_names)
# Extract programs called via Bash
bash_programs = []
bash_commands = []
for tc in tool_calls:
if tc['name'] == 'Bash' and 'bash_parsed' in tc:
bash_programs.extend(tc['bash_parsed']['programs'])
bash_commands.append({
'command': tc['input'].get('command', ''),
'description': tc['input'].get('description', ''),
'programs': tc['bash_parsed']['programs'],
})
bash_program_counts = Counter(bash_programs)
# Extract more details per tool
tool_details = {}
for tc in tool_calls:
name = tc['name']
if name not in tool_details:
tool_details[name] = []
# For Bash, extract the command description if available
if name == 'Bash':
desc = tc['input'].get('description', tc['input'].get('command', '')[:50])
tool_details[name].append(desc)
elif name == 'Read':
tool_details[name].append(tc['input'].get('file_path', ''))
elif name == 'Write':
tool_details[name].append(tc['input'].get('file_path', ''))
elif name == 'Edit':
tool_details[name].append(tc['input'].get('file_path', ''))
elif name == 'Glob':
tool_details[name].append(tc['input'].get('pattern', ''))
elif name == 'Grep':
tool_details[name].append(tc['input'].get('pattern', ''))
else:
tool_details[name].append(str(tc['input'])[:80])
return {
'total_calls': len(tool_calls),
'unique_tools': len(tool_counts),
'tool_counts': dict(tool_counts.most_common()),
'tool_details': tool_details,
'bash_program_counts': dict(bash_program_counts.most_common()),
'bash_commands': bash_commands,
}
def print_report(stats: dict):
"""Print a formatted report of tool usage statistics."""
print("=" * 60)
print("TOOL USAGE STATISTICS")
print("=" * 60)
print()
print(f"Total tool calls: {stats['total_calls']}")
print(f"Unique tools used: {stats['unique_tools']}")
print()
print("-" * 60)
print("TOOL CALL FREQUENCY (sorted by count)")
print("-" * 60)
for tool, count in stats['tool_counts'].items():
pct = (count / stats['total_calls']) * 100
bar = "█" * int(pct / 2)
print(f"{tool:20} {count:4} ({pct:5.1f}%) {bar}")
# Bash program breakdown
if stats.get('bash_program_counts'):
print()
print("-" * 60)
print("PROGRAMS CALLED VIA BASH (sorted by count)")
print("-" * 60)
total_programs = sum(stats['bash_program_counts'].values())
for prog, count in stats['bash_program_counts'].items():
pct = (count / total_programs) * 100
bar = "▓" * int(pct / 2)
print(f"{prog:20} {count:4} ({pct:5.1f}%) {bar}")
print()
print("-" * 60)
print("BASH COMMAND DETAILS")
print("-" * 60)
for cmd_info in stats['bash_commands'][:15]: # Show first 15
progs = ', '.join(cmd_info['programs']) if cmd_info['programs'] else '(none)'
desc = cmd_info['description'] or cmd_info['command'][:50]
print(f" [{progs:20}] {desc[:50]}")
print()
print("-" * 60)
print("SAMPLE DETAILS PER TOOL")
print("-" * 60)
for tool, details in stats['tool_details'].items():
print(f"\n{tool} (showing up to 5 samples):")
for detail in details[:5]:
if detail:
print(f" • {detail[:70]}{'...' if len(str(detail)) > 70 else ''}")
def find_jsonl_files(directory: str) -> list[Path]:
"""Recursively find all .jsonl files in a directory."""
dir_path = Path(directory)
return sorted(dir_path.rglob("*.jsonl"))
def analyze_single_file(jsonl_path: str) -> dict | None:
"""Analyze a single file and return stats."""
tool_calls = extract_tool_calls(jsonl_path)
if not tool_calls:
return None
return generate_statistics(tool_calls)
def merge_statistics(all_stats: list[dict]) -> dict:
"""Merge statistics from multiple files into a consolidated summary."""
merged = {
'total_calls': 0,
'unique_tools': set(),
'tool_counts': Counter(),
'bash_program_counts': Counter(),
'bash_commands': [],
'tool_details': {},
'file_count': len(all_stats),
'per_file_stats': [],
}
for stats in all_stats:
merged['total_calls'] += stats['total_calls']
merged['unique_tools'].update(stats['tool_counts'].keys())
merged['tool_counts'].update(stats['tool_counts'])
merged['bash_program_counts'].update(stats.get('bash_program_counts', {}))
merged['bash_commands'].extend(stats.get('bash_commands', [])[:5]) # Sample from each
# Merge tool details
for tool, details in stats.get('tool_details', {}).items():
if tool not in merged['tool_details']:
merged['tool_details'][tool] = []
merged['tool_details'][tool].extend(details[:3]) # Sample from each
# Convert back to regular types for output
merged['unique_tools'] = len(merged['unique_tools'])
merged['tool_counts'] = dict(merged['tool_counts'].most_common())
merged['bash_program_counts'] = dict(merged['bash_program_counts'].most_common())
return merged
def print_consolidated_report(stats: dict, file_stats: list[tuple[str, dict]]):
"""Print a consolidated report across multiple files."""
print("=" * 70)
print("CONSOLIDATED TOOL USAGE STATISTICS")
print("=" * 70)
print()
print(f"Files analyzed: {stats['file_count']}")
print(f"Total tool calls: {stats['total_calls']}")
print(f"Unique tools used: {stats['unique_tools']}")
print(f"Average calls per session: {stats['total_calls'] / stats['file_count']:.1f}")
print()
# Per-file summary
print("-" * 70)
print("PER-SESSION BREAKDOWN")
print("-" * 70)
print(f"{'Session ID':<40} {'Calls':>8} {'Tools':>8}")
print("-" * 70)
for filepath, fstats in sorted(file_stats, key=lambda x: -x[1]['total_calls'])[:20]:
session_id = Path(filepath).stem[:36]
print(f"{session_id:<40} {fstats['total_calls']:>8} {fstats['unique_tools']:>8}")
if len(file_stats) > 20:
print(f" ... and {len(file_stats) - 20} more sessions")
print()
print("-" * 70)
print("TOOL CALL FREQUENCY (sorted by count)")
print("-" * 70)
max_count = max(stats['tool_counts'].values()) if stats['tool_counts'] else 1
for tool, count in stats['tool_counts'].items():
pct = (count / stats['total_calls']) * 100
bar_len = int((count / max_count) * 40)
bar = "█" * bar_len
print(f"{tool:20} {count:6} ({pct:5.1f}%) {bar}")
# Bash program breakdown
if stats.get('bash_program_counts'):
print()
print("-" * 70)
print("PROGRAMS CALLED VIA BASH (sorted by count)")
print("-" * 70)
total_programs = sum(stats['bash_program_counts'].values())
max_prog_count = max(stats['bash_program_counts'].values()) if stats['bash_program_counts'] else 1
# Show top 25 programs
for i, (prog, count) in enumerate(stats['bash_program_counts'].items()):
if i >= 25:
remaining = len(stats['bash_program_counts']) - 25
print(f" ... and {remaining} more programs")
break
pct = (count / total_programs) * 100
bar_len = int((count / max_prog_count) * 30)
bar = "▓" * bar_len
print(f"{prog:20} {count:6} ({pct:5.1f}%) {bar}")
print()
print("-" * 70)
print("SAMPLE DETAILS PER TOOL (from all sessions)")
print("-" * 70)
for tool, details in stats['tool_details'].items():
# Deduplicate and show samples
unique_details = list(dict.fromkeys(d for d in details if d))[:5]
print(f"\n{tool} ({stats['tool_counts'].get(tool, 0)} total calls, showing samples):")
for detail in unique_details:
if detail:
print(f" • {str(detail)[:70]}{'...' if len(str(detail)) > 70 else ''}")
def main():
if len(sys.argv) < 2:
print("Usage: python analyze_tools.py <jsonl_file_or_directory>")
print()
print(" Single file: python analyze_tools.py session.jsonl")
print(" Directory: python analyze_tools.py /path/to/sessions/")
sys.exit(1)
target_path = Path(sys.argv[1])
if not target_path.exists():
print(f"Error: Path not found: {target_path}")
sys.exit(1)
# Check if it's a directory or single file
if target_path.is_dir():
# Recursive directory mode
print(f"Scanning directory: {target_path}")
jsonl_files = find_jsonl_files(target_path)
if not jsonl_files:
print("No .jsonl files found in directory.")
sys.exit(0)
print(f"Found {len(jsonl_files)} .jsonl files")
print()
all_stats = []
file_stats = []
for i, jsonl_file in enumerate(jsonl_files):
stats = analyze_single_file(str(jsonl_file))
if stats:
all_stats.append(stats)
file_stats.append((str(jsonl_file), stats))
# Progress indicator
if (i + 1) % 10 == 0 or i + 1 == len(jsonl_files):
print(f"\rProcessed {i + 1}/{len(jsonl_files)} files...", end="", flush=True)
print("\n")
if not all_stats:
print("No tool calls found in any files.")
sys.exit(0)
# Merge and print consolidated report
merged = merge_statistics(all_stats)
print_consolidated_report(merged, file_stats)
# JSON output
print()
print("-" * 70)
print("RAW STATISTICS (JSON)")
print("-" * 70)
print(json.dumps({
'files_analyzed': merged['file_count'],
'total_calls': merged['total_calls'],
'unique_tools': merged['unique_tools'],
'avg_calls_per_session': round(merged['total_calls'] / merged['file_count'], 1),
'tool_counts': merged['tool_counts'],
'bash_program_counts': merged['bash_program_counts'],
}, indent=2))
else:
# Single file mode
print(f"Analyzing: {target_path}")
print()
tool_calls = extract_tool_calls(str(target_path))
if not tool_calls:
print("No tool calls found in the file.")
sys.exit(0)
stats = generate_statistics(tool_calls)
print_report(stats)
# JSON output
print()
print("-" * 60)
print("RAW STATISTICS (JSON)")
print("-" * 60)
print(json.dumps({
'total_calls': stats['total_calls'],
'unique_tools': stats['unique_tools'],
'tool_counts': stats['tool_counts'],
'bash_program_counts': stats.get('bash_program_counts', {}),
}, indent=2))
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment