Created
January 8, 2026 19:18
-
-
Save swairshah/a6217b80b0e093803d82038b9aff5f74 to your computer and use it in GitHub Desktop.
Analyze Claude Code session JSONL files for tool usage statistics
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| """ | |
| Analyze tool usage from a Claude Code JSONL session file. | |
| Extracts all tool_use messages and generates statistics. | |
| """ | |
| import json | |
| import re | |
| import shlex | |
| import sys | |
| from collections import Counter | |
| from pathlib import Path | |
| def parse_bash_command(command: str) -> dict: | |
| """ | |
| Parse a bash command to extract the program(s) being called. | |
| Returns info about the command structure. | |
| """ | |
| if not command: | |
| return {'programs': [], 'raw': ''} | |
| # Remove comments (lines starting with # or inline comments) | |
| command = re.sub(r'#.*$', '', command, flags=re.MULTILINE) | |
| # Split by pipes, &&, ||, ; to get individual commands | |
| # This regex splits on |, &&, ||, ; while preserving the structure | |
| parts = re.split(r'\s*(?:\||\|\||&&|;)\s*', command) | |
| programs = [] | |
| for part in parts: | |
| part = part.strip() | |
| if not part: | |
| continue | |
| # Skip if it starts with a comment | |
| if part.startswith('#'): | |
| continue | |
| # Handle subshells and command substitution | |
| part = re.sub(r'^\(|\)$', '', part) | |
| part = re.sub(r'^\$\(|\)$', '', part) | |
| # Skip variable assignments at the start | |
| if re.match(r'^[A-Za-z_][A-Za-z0-9_]*=', part): | |
| # Check if there's a command after the assignment | |
| match = re.search(r'^[A-Za-z_][A-Za-z0-9_]*=\S+\s+(.+)', part) | |
| if match: | |
| part = match.group(1) | |
| else: | |
| continue | |
| # Handle env/sudo/time prefixes | |
| prefixes = ['env', 'sudo', 'time', 'nice', 'nohup', 'xargs'] | |
| while True: | |
| first_word = part.split()[0] if part.split() else '' | |
| if first_word in prefixes: | |
| rest = part.split(maxsplit=1) | |
| part = rest[1] if len(rest) > 1 else '' | |
| else: | |
| break | |
| if not part: | |
| continue | |
| # Get the actual program name | |
| try: | |
| tokens = shlex.split(part) | |
| if tokens: | |
| prog = tokens[0] | |
| # Handle path-qualified commands | |
| prog = prog.split('/')[-1] | |
| # Filter out things that don't look like program names | |
| if prog and re.match(r'^[a-zA-Z][a-zA-Z0-9_.-]*$', prog): | |
| programs.append(prog) | |
| except ValueError: | |
| # shlex failed, try simple split | |
| first = part.split()[0] if part.split() else '' | |
| if first: | |
| prog = first.split('/')[-1] | |
| if prog and re.match(r'^[a-zA-Z][a-zA-Z0-9_.-]*$', prog): | |
| programs.append(prog) | |
| return { | |
| 'programs': programs, | |
| 'raw': command[:100], | |
| } | |
| def extract_tool_calls(jsonl_path: str) -> list[dict]: | |
| """Extract all tool_use calls from a JSONL file.""" | |
| tool_calls = [] | |
| with open(jsonl_path, 'r') as f: | |
| for line_num, line in enumerate(f, 1): | |
| line = line.strip() | |
| if not line: | |
| continue | |
| try: | |
| entry = json.loads(line) | |
| except json.JSONDecodeError as e: | |
| print(f"Warning: Failed to parse line {line_num}: {e}", file=sys.stderr) | |
| continue | |
| # Look for message.content which contains tool_use entries | |
| message = entry.get('message', {}) | |
| content = message.get('content', []) | |
| # content can be a string or a list | |
| if not isinstance(content, list): | |
| continue | |
| for item in content: | |
| if isinstance(item, dict) and item.get('type') == 'tool_use': | |
| tool_info = { | |
| 'name': item.get('name'), | |
| 'id': item.get('id'), | |
| 'input': item.get('input', {}), | |
| 'timestamp': entry.get('timestamp'), | |
| 'uuid': entry.get('uuid'), | |
| } | |
| # Parse Bash commands to extract programs | |
| if item.get('name') == 'Bash': | |
| cmd = item.get('input', {}).get('command', '') | |
| tool_info['bash_parsed'] = parse_bash_command(cmd) | |
| tool_calls.append(tool_info) | |
| return tool_calls | |
| def generate_statistics(tool_calls: list[dict]) -> dict: | |
| """Generate statistics from tool calls.""" | |
| tool_names = [tc['name'] for tc in tool_calls] | |
| tool_counts = Counter(tool_names) | |
| # Extract programs called via Bash | |
| bash_programs = [] | |
| bash_commands = [] | |
| for tc in tool_calls: | |
| if tc['name'] == 'Bash' and 'bash_parsed' in tc: | |
| bash_programs.extend(tc['bash_parsed']['programs']) | |
| bash_commands.append({ | |
| 'command': tc['input'].get('command', ''), | |
| 'description': tc['input'].get('description', ''), | |
| 'programs': tc['bash_parsed']['programs'], | |
| }) | |
| bash_program_counts = Counter(bash_programs) | |
| # Extract more details per tool | |
| tool_details = {} | |
| for tc in tool_calls: | |
| name = tc['name'] | |
| if name not in tool_details: | |
| tool_details[name] = [] | |
| # For Bash, extract the command description if available | |
| if name == 'Bash': | |
| desc = tc['input'].get('description', tc['input'].get('command', '')[:50]) | |
| tool_details[name].append(desc) | |
| elif name == 'Read': | |
| tool_details[name].append(tc['input'].get('file_path', '')) | |
| elif name == 'Write': | |
| tool_details[name].append(tc['input'].get('file_path', '')) | |
| elif name == 'Edit': | |
| tool_details[name].append(tc['input'].get('file_path', '')) | |
| elif name == 'Glob': | |
| tool_details[name].append(tc['input'].get('pattern', '')) | |
| elif name == 'Grep': | |
| tool_details[name].append(tc['input'].get('pattern', '')) | |
| else: | |
| tool_details[name].append(str(tc['input'])[:80]) | |
| return { | |
| 'total_calls': len(tool_calls), | |
| 'unique_tools': len(tool_counts), | |
| 'tool_counts': dict(tool_counts.most_common()), | |
| 'tool_details': tool_details, | |
| 'bash_program_counts': dict(bash_program_counts.most_common()), | |
| 'bash_commands': bash_commands, | |
| } | |
| def print_report(stats: dict): | |
| """Print a formatted report of tool usage statistics.""" | |
| print("=" * 60) | |
| print("TOOL USAGE STATISTICS") | |
| print("=" * 60) | |
| print() | |
| print(f"Total tool calls: {stats['total_calls']}") | |
| print(f"Unique tools used: {stats['unique_tools']}") | |
| print() | |
| print("-" * 60) | |
| print("TOOL CALL FREQUENCY (sorted by count)") | |
| print("-" * 60) | |
| for tool, count in stats['tool_counts'].items(): | |
| pct = (count / stats['total_calls']) * 100 | |
| bar = "█" * int(pct / 2) | |
| print(f"{tool:20} {count:4} ({pct:5.1f}%) {bar}") | |
| # Bash program breakdown | |
| if stats.get('bash_program_counts'): | |
| print() | |
| print("-" * 60) | |
| print("PROGRAMS CALLED VIA BASH (sorted by count)") | |
| print("-" * 60) | |
| total_programs = sum(stats['bash_program_counts'].values()) | |
| for prog, count in stats['bash_program_counts'].items(): | |
| pct = (count / total_programs) * 100 | |
| bar = "▓" * int(pct / 2) | |
| print(f"{prog:20} {count:4} ({pct:5.1f}%) {bar}") | |
| print() | |
| print("-" * 60) | |
| print("BASH COMMAND DETAILS") | |
| print("-" * 60) | |
| for cmd_info in stats['bash_commands'][:15]: # Show first 15 | |
| progs = ', '.join(cmd_info['programs']) if cmd_info['programs'] else '(none)' | |
| desc = cmd_info['description'] or cmd_info['command'][:50] | |
| print(f" [{progs:20}] {desc[:50]}") | |
| print() | |
| print("-" * 60) | |
| print("SAMPLE DETAILS PER TOOL") | |
| print("-" * 60) | |
| for tool, details in stats['tool_details'].items(): | |
| print(f"\n{tool} (showing up to 5 samples):") | |
| for detail in details[:5]: | |
| if detail: | |
| print(f" • {detail[:70]}{'...' if len(str(detail)) > 70 else ''}") | |
| def find_jsonl_files(directory: str) -> list[Path]: | |
| """Recursively find all .jsonl files in a directory.""" | |
| dir_path = Path(directory) | |
| return sorted(dir_path.rglob("*.jsonl")) | |
| def analyze_single_file(jsonl_path: str) -> dict | None: | |
| """Analyze a single file and return stats.""" | |
| tool_calls = extract_tool_calls(jsonl_path) | |
| if not tool_calls: | |
| return None | |
| return generate_statistics(tool_calls) | |
| def merge_statistics(all_stats: list[dict]) -> dict: | |
| """Merge statistics from multiple files into a consolidated summary.""" | |
| merged = { | |
| 'total_calls': 0, | |
| 'unique_tools': set(), | |
| 'tool_counts': Counter(), | |
| 'bash_program_counts': Counter(), | |
| 'bash_commands': [], | |
| 'tool_details': {}, | |
| 'file_count': len(all_stats), | |
| 'per_file_stats': [], | |
| } | |
| for stats in all_stats: | |
| merged['total_calls'] += stats['total_calls'] | |
| merged['unique_tools'].update(stats['tool_counts'].keys()) | |
| merged['tool_counts'].update(stats['tool_counts']) | |
| merged['bash_program_counts'].update(stats.get('bash_program_counts', {})) | |
| merged['bash_commands'].extend(stats.get('bash_commands', [])[:5]) # Sample from each | |
| # Merge tool details | |
| for tool, details in stats.get('tool_details', {}).items(): | |
| if tool not in merged['tool_details']: | |
| merged['tool_details'][tool] = [] | |
| merged['tool_details'][tool].extend(details[:3]) # Sample from each | |
| # Convert back to regular types for output | |
| merged['unique_tools'] = len(merged['unique_tools']) | |
| merged['tool_counts'] = dict(merged['tool_counts'].most_common()) | |
| merged['bash_program_counts'] = dict(merged['bash_program_counts'].most_common()) | |
| return merged | |
| def print_consolidated_report(stats: dict, file_stats: list[tuple[str, dict]]): | |
| """Print a consolidated report across multiple files.""" | |
| print("=" * 70) | |
| print("CONSOLIDATED TOOL USAGE STATISTICS") | |
| print("=" * 70) | |
| print() | |
| print(f"Files analyzed: {stats['file_count']}") | |
| print(f"Total tool calls: {stats['total_calls']}") | |
| print(f"Unique tools used: {stats['unique_tools']}") | |
| print(f"Average calls per session: {stats['total_calls'] / stats['file_count']:.1f}") | |
| print() | |
| # Per-file summary | |
| print("-" * 70) | |
| print("PER-SESSION BREAKDOWN") | |
| print("-" * 70) | |
| print(f"{'Session ID':<40} {'Calls':>8} {'Tools':>8}") | |
| print("-" * 70) | |
| for filepath, fstats in sorted(file_stats, key=lambda x: -x[1]['total_calls'])[:20]: | |
| session_id = Path(filepath).stem[:36] | |
| print(f"{session_id:<40} {fstats['total_calls']:>8} {fstats['unique_tools']:>8}") | |
| if len(file_stats) > 20: | |
| print(f" ... and {len(file_stats) - 20} more sessions") | |
| print() | |
| print("-" * 70) | |
| print("TOOL CALL FREQUENCY (sorted by count)") | |
| print("-" * 70) | |
| max_count = max(stats['tool_counts'].values()) if stats['tool_counts'] else 1 | |
| for tool, count in stats['tool_counts'].items(): | |
| pct = (count / stats['total_calls']) * 100 | |
| bar_len = int((count / max_count) * 40) | |
| bar = "█" * bar_len | |
| print(f"{tool:20} {count:6} ({pct:5.1f}%) {bar}") | |
| # Bash program breakdown | |
| if stats.get('bash_program_counts'): | |
| print() | |
| print("-" * 70) | |
| print("PROGRAMS CALLED VIA BASH (sorted by count)") | |
| print("-" * 70) | |
| total_programs = sum(stats['bash_program_counts'].values()) | |
| max_prog_count = max(stats['bash_program_counts'].values()) if stats['bash_program_counts'] else 1 | |
| # Show top 25 programs | |
| for i, (prog, count) in enumerate(stats['bash_program_counts'].items()): | |
| if i >= 25: | |
| remaining = len(stats['bash_program_counts']) - 25 | |
| print(f" ... and {remaining} more programs") | |
| break | |
| pct = (count / total_programs) * 100 | |
| bar_len = int((count / max_prog_count) * 30) | |
| bar = "▓" * bar_len | |
| print(f"{prog:20} {count:6} ({pct:5.1f}%) {bar}") | |
| print() | |
| print("-" * 70) | |
| print("SAMPLE DETAILS PER TOOL (from all sessions)") | |
| print("-" * 70) | |
| for tool, details in stats['tool_details'].items(): | |
| # Deduplicate and show samples | |
| unique_details = list(dict.fromkeys(d for d in details if d))[:5] | |
| print(f"\n{tool} ({stats['tool_counts'].get(tool, 0)} total calls, showing samples):") | |
| for detail in unique_details: | |
| if detail: | |
| print(f" • {str(detail)[:70]}{'...' if len(str(detail)) > 70 else ''}") | |
| def main(): | |
| if len(sys.argv) < 2: | |
| print("Usage: python analyze_tools.py <jsonl_file_or_directory>") | |
| print() | |
| print(" Single file: python analyze_tools.py session.jsonl") | |
| print(" Directory: python analyze_tools.py /path/to/sessions/") | |
| sys.exit(1) | |
| target_path = Path(sys.argv[1]) | |
| if not target_path.exists(): | |
| print(f"Error: Path not found: {target_path}") | |
| sys.exit(1) | |
| # Check if it's a directory or single file | |
| if target_path.is_dir(): | |
| # Recursive directory mode | |
| print(f"Scanning directory: {target_path}") | |
| jsonl_files = find_jsonl_files(target_path) | |
| if not jsonl_files: | |
| print("No .jsonl files found in directory.") | |
| sys.exit(0) | |
| print(f"Found {len(jsonl_files)} .jsonl files") | |
| print() | |
| all_stats = [] | |
| file_stats = [] | |
| for i, jsonl_file in enumerate(jsonl_files): | |
| stats = analyze_single_file(str(jsonl_file)) | |
| if stats: | |
| all_stats.append(stats) | |
| file_stats.append((str(jsonl_file), stats)) | |
| # Progress indicator | |
| if (i + 1) % 10 == 0 or i + 1 == len(jsonl_files): | |
| print(f"\rProcessed {i + 1}/{len(jsonl_files)} files...", end="", flush=True) | |
| print("\n") | |
| if not all_stats: | |
| print("No tool calls found in any files.") | |
| sys.exit(0) | |
| # Merge and print consolidated report | |
| merged = merge_statistics(all_stats) | |
| print_consolidated_report(merged, file_stats) | |
| # JSON output | |
| print() | |
| print("-" * 70) | |
| print("RAW STATISTICS (JSON)") | |
| print("-" * 70) | |
| print(json.dumps({ | |
| 'files_analyzed': merged['file_count'], | |
| 'total_calls': merged['total_calls'], | |
| 'unique_tools': merged['unique_tools'], | |
| 'avg_calls_per_session': round(merged['total_calls'] / merged['file_count'], 1), | |
| 'tool_counts': merged['tool_counts'], | |
| 'bash_program_counts': merged['bash_program_counts'], | |
| }, indent=2)) | |
| else: | |
| # Single file mode | |
| print(f"Analyzing: {target_path}") | |
| print() | |
| tool_calls = extract_tool_calls(str(target_path)) | |
| if not tool_calls: | |
| print("No tool calls found in the file.") | |
| sys.exit(0) | |
| stats = generate_statistics(tool_calls) | |
| print_report(stats) | |
| # JSON output | |
| print() | |
| print("-" * 60) | |
| print("RAW STATISTICS (JSON)") | |
| print("-" * 60) | |
| print(json.dumps({ | |
| 'total_calls': stats['total_calls'], | |
| 'unique_tools': stats['unique_tools'], | |
| 'tool_counts': stats['tool_counts'], | |
| 'bash_program_counts': stats.get('bash_program_counts', {}), | |
| }, indent=2)) | |
| if __name__ == '__main__': | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment