swairshah/analyze_tools.py

## analyze_tools.py
#!/usr/bin/env python3
"""
Analyze tool usage from a Claude Code JSONL session file.
Extracts all tool_use messages and generates statistics.
"""

import json
import re
import shlex
import sys
from collections import Counter
from pathlib import Path


def parse_bash_command(command: str) -> dict:
    """
    Parse a bash command to extract the program(s) being called.
    Returns info about the command structure.
    """
    if not command:
        return {'programs': [], 'raw': ''}

    # Remove comments (lines starting with # or inline comments)
    command = re.sub(r'#.*$', '', command, flags=re.MULTILINE)

    # Split by pipes, &&, ||, ; to get individual commands
    # This regex splits on |, &&, ||, ; while preserving the structure
    parts = re.split(r'\s*(?:\||\|\||&&|;)\s*', command)

    programs = []
    for part in parts:
        part = part.strip()
        if not part:
            continue

        # Skip if it starts with a comment
        if part.startswith('#'):
            continue

        # Handle subshells and command substitution
        part = re.sub(r'^\(|\)$', '', part)
        part = re.sub(r'^\$\(|\)$', '', part)

        # Skip variable assignments at the start
        if re.match(r'^[A-Za-z_][A-Za-z0-9_]*=', part):
            # Check if there's a command after the assignment
            match = re.search(r'^[A-Za-z_][A-Za-z0-9_]*=\S+\s+(.+)', part)
            if match:
                part = match.group(1)
            else:
                continue

        # Handle env/sudo/time prefixes
        prefixes = ['env', 'sudo', 'time', 'nice', 'nohup', 'xargs']
        while True:
            first_word = part.split()[0] if part.split() else ''
            if first_word in prefixes:
                rest = part.split(maxsplit=1)
                part = rest[1] if len(rest) > 1 else ''
            else:
                break

        if not part:
            continue

        # Get the actual program name
        try:
            tokens = shlex.split(part)
            if tokens:
                prog = tokens[0]
                # Handle path-qualified commands
                prog = prog.split('/')[-1]
                # Filter out things that don't look like program names
                if prog and re.match(r'^[a-zA-Z][a-zA-Z0-9_.-]*$', prog):
                    programs.append(prog)
        except ValueError:
            # shlex failed, try simple split
            first = part.split()[0] if part.split() else ''
            if first:
                prog = first.split('/')[-1]
                if prog and re.match(r'^[a-zA-Z][a-zA-Z0-9_.-]*$', prog):
                    programs.append(prog)

    return {
        'programs': programs,
        'raw': command[:100],
    }


def extract_tool_calls(jsonl_path: str) -> list[dict]:
    """Extract all tool_use calls from a JSONL file."""
    tool_calls = []

    with open(jsonl_path, 'r') as f:
        for line_num, line in enumerate(f, 1):
            line = line.strip()
            if not line:
                continue

            try:
                entry = json.loads(line)
            except json.JSONDecodeError as e:
                print(f"Warning: Failed to parse line {line_num}: {e}", file=sys.stderr)
                continue

            # Look for message.content which contains tool_use entries
            message = entry.get('message', {})
            content = message.get('content', [])

            # content can be a string or a list
            if not isinstance(content, list):
                continue

            for item in content:
                if isinstance(item, dict) and item.get('type') == 'tool_use':
                    tool_info = {
                        'name': item.get('name'),
                        'id': item.get('id'),
                        'input': item.get('input', {}),
                        'timestamp': entry.get('timestamp'),
                        'uuid': entry.get('uuid'),
                    }

                    # Parse Bash commands to extract programs
                    if item.get('name') == 'Bash':
                        cmd = item.get('input', {}).get('command', '')
                        tool_info['bash_parsed'] = parse_bash_command(cmd)

                    tool_calls.append(tool_info)

    return tool_calls


def generate_statistics(tool_calls: list[dict]) -> dict:
    """Generate statistics from tool calls."""
    tool_names = [tc['name'] for tc in tool_calls]
    tool_counts = Counter(tool_names)

    # Extract programs called via Bash
    bash_programs = []
    bash_commands = []
    for tc in tool_calls:
        if tc['name'] == 'Bash' and 'bash_parsed' in tc:
            bash_programs.extend(tc['bash_parsed']['programs'])
            bash_commands.append({
                'command': tc['input'].get('command', ''),
                'description': tc['input'].get('description', ''),
                'programs': tc['bash_parsed']['programs'],
            })

    bash_program_counts = Counter(bash_programs)

    # Extract more details per tool
    tool_details = {}
    for tc in tool_calls:
        name = tc['name']
        if name not in tool_details:
            tool_details[name] = []

        # For Bash, extract the command description if available
        if name == 'Bash':
            desc = tc['input'].get('description', tc['input'].get('command', '')[:50])
            tool_details[name].append(desc)
        elif name == 'Read':
            tool_details[name].append(tc['input'].get('file_path', ''))
        elif name == 'Write':
            tool_details[name].append(tc['input'].get('file_path', ''))
        elif name == 'Edit':
            tool_details[name].append(tc['input'].get('file_path', ''))
        elif name == 'Glob':
            tool_details[name].append(tc['input'].get('pattern', ''))
        elif name == 'Grep':
            tool_details[name].append(tc['input'].get('pattern', ''))
        else:
            tool_details[name].append(str(tc['input'])[:80])

    return {
        'total_calls': len(tool_calls),
        'unique_tools': len(tool_counts),
        'tool_counts': dict(tool_counts.most_common()),
        'tool_details': tool_details,
        'bash_program_counts': dict(bash_program_counts.most_common()),
        'bash_commands': bash_commands,
    }


def print_report(stats: dict):
    """Print a formatted report of tool usage statistics."""
    print("=" * 60)
    print("TOOL USAGE STATISTICS")
    print("=" * 60)
    print()
    print(f"Total tool calls: {stats['total_calls']}")
    print(f"Unique tools used: {stats['unique_tools']}")
    print()
    print("-" * 60)
    print("TOOL CALL FREQUENCY (sorted by count)")
    print("-" * 60)

    for tool, count in stats['tool_counts'].items():
        pct = (count / stats['total_calls']) * 100
        bar = "█" * int(pct / 2)
        print(f"{tool:20} {count:4} ({pct:5.1f}%) {bar}")

    # Bash program breakdown
    if stats.get('bash_program_counts'):
        print()
        print("-" * 60)
        print("PROGRAMS CALLED VIA BASH (sorted by count)")
        print("-" * 60)

        total_programs = sum(stats['bash_program_counts'].values())
        for prog, count in stats['bash_program_counts'].items():
            pct = (count / total_programs) * 100
            bar = "▓" * int(pct / 2)
            print(f"{prog:20} {count:4} ({pct:5.1f}%) {bar}")

        print()
        print("-" * 60)
        print("BASH COMMAND DETAILS")
        print("-" * 60)
        for cmd_info in stats['bash_commands'][:15]:  # Show first 15
            progs = ', '.join(cmd_info['programs']) if cmd_info['programs'] else '(none)'
            desc = cmd_info['description'] or cmd_info['command'][:50]
            print(f"  [{progs:20}] {desc[:50]}")

    print()
    print("-" * 60)
    print("SAMPLE DETAILS PER TOOL")
    print("-" * 60)

    for tool, details in stats['tool_details'].items():
        print(f"\n{tool} (showing up to 5 samples):")
        for detail in details[:5]:
            if detail:
                print(f"  • {detail[:70]}{'...' if len(str(detail)) > 70 else ''}")


def find_jsonl_files(directory: str) -> list[Path]:
    """Recursively find all .jsonl files in a directory."""
    dir_path = Path(directory)
    return sorted(dir_path.rglob("*.jsonl"))


def analyze_single_file(jsonl_path: str) -> dict | None:
    """Analyze a single file and return stats."""
    tool_calls = extract_tool_calls(jsonl_path)
    if not tool_calls:
        return None
    return generate_statistics(tool_calls)


def merge_statistics(all_stats: list[dict]) -> dict:
    """Merge statistics from multiple files into a consolidated summary."""
    merged = {
        'total_calls': 0,
        'unique_tools': set(),
        'tool_counts': Counter(),
        'bash_program_counts': Counter(),
        'bash_commands': [],
        'tool_details': {},
        'file_count': len(all_stats),
        'per_file_stats': [],
    }

    for stats in all_stats:
        merged['total_calls'] += stats['total_calls']
        merged['unique_tools'].update(stats['tool_counts'].keys())
        merged['tool_counts'].update(stats['tool_counts'])
        merged['bash_program_counts'].update(stats.get('bash_program_counts', {}))
        merged['bash_commands'].extend(stats.get('bash_commands', [])[:5])  # Sample from each

        # Merge tool details
        for tool, details in stats.get('tool_details', {}).items():
            if tool not in merged['tool_details']:
                merged['tool_details'][tool] = []
            merged['tool_details'][tool].extend(details[:3])  # Sample from each

    # Convert back to regular types for output
    merged['unique_tools'] = len(merged['unique_tools'])
    merged['tool_counts'] = dict(merged['tool_counts'].most_common())
    merged['bash_program_counts'] = dict(merged['bash_program_counts'].most_common())

    return merged


def print_consolidated_report(stats: dict, file_stats: list[tuple[str, dict]]):
    """Print a consolidated report across multiple files."""
    print("=" * 70)
    print("CONSOLIDATED TOOL USAGE STATISTICS")
    print("=" * 70)
    print()
    print(f"Files analyzed: {stats['file_count']}")
    print(f"Total tool calls: {stats['total_calls']}")
    print(f"Unique tools used: {stats['unique_tools']}")
    print(f"Average calls per session: {stats['total_calls'] / stats['file_count']:.1f}")
    print()

    # Per-file summary
    print("-" * 70)
    print("PER-SESSION BREAKDOWN")
    print("-" * 70)
    print(f"{'Session ID':<40} {'Calls':>8} {'Tools':>8}")
    print("-" * 70)
    for filepath, fstats in sorted(file_stats, key=lambda x: -x[1]['total_calls'])[:20]:
        session_id = Path(filepath).stem[:36]
        print(f"{session_id:<40} {fstats['total_calls']:>8} {fstats['unique_tools']:>8}")
    if len(file_stats) > 20:
        print(f"  ... and {len(file_stats) - 20} more sessions")

    print()
    print("-" * 70)
    print("TOOL CALL FREQUENCY (sorted by count)")
    print("-" * 70)

    max_count = max(stats['tool_counts'].values()) if stats['tool_counts'] else 1
    for tool, count in stats['tool_counts'].items():
        pct = (count / stats['total_calls']) * 100
        bar_len = int((count / max_count) * 40)
        bar = "█" * bar_len
        print(f"{tool:20} {count:6} ({pct:5.1f}%) {bar}")

    # Bash program breakdown
    if stats.get('bash_program_counts'):
        print()
        print("-" * 70)
        print("PROGRAMS CALLED VIA BASH (sorted by count)")
        print("-" * 70)

        total_programs = sum(stats['bash_program_counts'].values())
        max_prog_count = max(stats['bash_program_counts'].values()) if stats['bash_program_counts'] else 1

        # Show top 25 programs
        for i, (prog, count) in enumerate(stats['bash_program_counts'].items()):
            if i >= 25:
                remaining = len(stats['bash_program_counts']) - 25
                print(f"  ... and {remaining} more programs")
                break
            pct = (count / total_programs) * 100
            bar_len = int((count / max_prog_count) * 30)
            bar = "▓" * bar_len
            print(f"{prog:20} {count:6} ({pct:5.1f}%) {bar}")

    print()
    print("-" * 70)
    print("SAMPLE DETAILS PER TOOL (from all sessions)")
    print("-" * 70)

    for tool, details in stats['tool_details'].items():
        # Deduplicate and show samples
        unique_details = list(dict.fromkeys(d for d in details if d))[:5]
        print(f"\n{tool} ({stats['tool_counts'].get(tool, 0)} total calls, showing samples):")
        for detail in unique_details:
            if detail:
                print(f"  • {str(detail)[:70]}{'...' if len(str(detail)) > 70 else ''}")


def main():
    if len(sys.argv) < 2:
        print("Usage: python analyze_tools.py <jsonl_file_or_directory>")
        print()
        print("  Single file:  python analyze_tools.py session.jsonl")
        print("  Directory:    python analyze_tools.py /path/to/sessions/")
        sys.exit(1)

    target_path = Path(sys.argv[1])

    if not target_path.exists():
        print(f"Error: Path not found: {target_path}")
        sys.exit(1)

    # Check if it's a directory or single file
    if target_path.is_dir():
        # Recursive directory mode
        print(f"Scanning directory: {target_path}")
        jsonl_files = find_jsonl_files(target_path)

        if not jsonl_files:
            print("No .jsonl files found in directory.")
            sys.exit(0)

        print(f"Found {len(jsonl_files)} .jsonl files")
        print()

        all_stats = []
        file_stats = []

        for i, jsonl_file in enumerate(jsonl_files):
            stats = analyze_single_file(str(jsonl_file))
            if stats:
                all_stats.append(stats)
                file_stats.append((str(jsonl_file), stats))

            # Progress indicator
            if (i + 1) % 10 == 0 or i + 1 == len(jsonl_files):
                print(f"\rProcessed {i + 1}/{len(jsonl_files)} files...", end="", flush=True)

        print("\n")

        if not all_stats:
            print("No tool calls found in any files.")
            sys.exit(0)

        # Merge and print consolidated report
        merged = merge_statistics(all_stats)
        print_consolidated_report(merged, file_stats)

        # JSON output
        print()
        print("-" * 70)
        print("RAW STATISTICS (JSON)")
        print("-" * 70)
        print(json.dumps({
            'files_analyzed': merged['file_count'],
            'total_calls': merged['total_calls'],
            'unique_tools': merged['unique_tools'],
            'avg_calls_per_session': round(merged['total_calls'] / merged['file_count'], 1),
            'tool_counts': merged['tool_counts'],
            'bash_program_counts': merged['bash_program_counts'],
        }, indent=2))

    else:
        # Single file mode
        print(f"Analyzing: {target_path}")
        print()

        tool_calls = extract_tool_calls(str(target_path))

        if not tool_calls:
            print("No tool calls found in the file.")
            sys.exit(0)

        stats = generate_statistics(tool_calls)
        print_report(stats)

        # JSON output
        print()
        print("-" * 60)
        print("RAW STATISTICS (JSON)")
        print("-" * 60)
        print(json.dumps({
            'total_calls': stats['total_calls'],
            'unique_tools': stats['unique_tools'],
            'tool_counts': stats['tool_counts'],
            'bash_program_counts': stats.get('bash_program_counts', {}),
        }, indent=2))


if __name__ == '__main__':
    main()
	#!/usr/bin/env python3
	"""
	Analyze tool usage from a Claude Code JSONL session file.
	Extracts all tool_use messages and generates statistics.
	"""

	import json
	import re
	import shlex
	import sys
	from collections import Counter
	from pathlib import Path


	def parse_bash_command(command: str) -> dict:
	"""
	Parse a bash command to extract the program(s) being called.
	Returns info about the command structure.
	"""
	if not command:
	return {'programs': [], 'raw': ''}

	# Remove comments (lines starting with # or inline comments)
	command = re.sub(r'#.*$', '', command, flags=re.MULTILINE)

	# Split by pipes, &&, \|\|, ; to get individual commands
	# This regex splits on \|, &&, \|\|, ; while preserving the structure
	parts = re.split(r'\s(?:\\|\|\\|\\|\|&&\|;)\s', command)

	programs = []
	for part in parts:
	part = part.strip()
	if not part:
	continue

	# Skip if it starts with a comment
	if part.startswith('#'):
	continue

	# Handle subshells and command substitution
	part = re.sub(r'^\(\|\)$', '', part)
	part = re.sub(r'^\$\(\|\)$', '', part)

	# Skip variable assignments at the start
	if re.match(r'^[A-Za-z_][A-Za-z0-9_]*=', part):
	# Check if there's a command after the assignment
	match = re.search(r'^[A-Za-z_][A-Za-z0-9_]*=\S+\s+(.+)', part)
	if match:
	part = match.group(1)
	else:
	continue

	# Handle env/sudo/time prefixes
	prefixes = ['env', 'sudo', 'time', 'nice', 'nohup', 'xargs']
	while True:
	first_word = part.split()[0] if part.split() else ''
	if first_word in prefixes:
	rest = part.split(maxsplit=1)
	part = rest[1] if len(rest) > 1 else ''
	else:
	break

	if not part:
	continue

	# Get the actual program name
	try:
	tokens = shlex.split(part)
	if tokens:
	prog = tokens[0]
	# Handle path-qualified commands
	prog = prog.split('/')[-1]
	# Filter out things that don't look like program names
	if prog and re.match(r'^[a-zA-Z][a-zA-Z0-9_.-]*$', prog):
	programs.append(prog)
	except ValueError:
	# shlex failed, try simple split
	first = part.split()[0] if part.split() else ''
	if first:
	prog = first.split('/')[-1]
	if prog and re.match(r'^[a-zA-Z][a-zA-Z0-9_.-]*$', prog):
	programs.append(prog)

	return {
	'programs': programs,
	'raw': command[:100],
	}


	def extract_tool_calls(jsonl_path: str) -> list[dict]:
	"""Extract all tool_use calls from a JSONL file."""
	tool_calls = []

	with open(jsonl_path, 'r') as f:
	for line_num, line in enumerate(f, 1):
	line = line.strip()
	if not line:
	continue

	try:
	entry = json.loads(line)
	except json.JSONDecodeError as e:
	print(f"Warning: Failed to parse line {line_num}: {e}", file=sys.stderr)
	continue

	# Look for message.content which contains tool_use entries
	message = entry.get('message', {})
	content = message.get('content', [])

	# content can be a string or a list
	if not isinstance(content, list):
	continue

	for item in content:
	if isinstance(item, dict) and item.get('type') == 'tool_use':
	tool_info = {
	'name': item.get('name'),
	'id': item.get('id'),
	'input': item.get('input', {}),
	'timestamp': entry.get('timestamp'),
	'uuid': entry.get('uuid'),
	}

	# Parse Bash commands to extract programs
	if item.get('name') == 'Bash':
	cmd = item.get('input', {}).get('command', '')
	tool_info['bash_parsed'] = parse_bash_command(cmd)

	tool_calls.append(tool_info)

	return tool_calls


	def generate_statistics(tool_calls: list[dict]) -> dict:
	"""Generate statistics from tool calls."""
	tool_names = [tc['name'] for tc in tool_calls]
	tool_counts = Counter(tool_names)

	# Extract programs called via Bash
	bash_programs = []
	bash_commands = []
	for tc in tool_calls:
	if tc['name'] == 'Bash' and 'bash_parsed' in tc:
	bash_programs.extend(tc['bash_parsed']['programs'])
	bash_commands.append({
	'command': tc['input'].get('command', ''),
	'description': tc['input'].get('description', ''),
	'programs': tc['bash_parsed']['programs'],
	})

	bash_program_counts = Counter(bash_programs)

	# Extract more details per tool
	tool_details = {}
	for tc in tool_calls:
	name = tc['name']
	if name not in tool_details:
	tool_details[name] = []

	# For Bash, extract the command description if available
	if name == 'Bash':
	desc = tc['input'].get('description', tc['input'].get('command', '')[:50])
	tool_details[name].append(desc)
	elif name == 'Read':
	tool_details[name].append(tc['input'].get('file_path', ''))
	elif name == 'Write':
	tool_details[name].append(tc['input'].get('file_path', ''))
	elif name == 'Edit':
	tool_details[name].append(tc['input'].get('file_path', ''))
	elif name == 'Glob':
	tool_details[name].append(tc['input'].get('pattern', ''))
	elif name == 'Grep':
	tool_details[name].append(tc['input'].get('pattern', ''))
	else:
	tool_details[name].append(str(tc['input'])[:80])

	return {
	'total_calls': len(tool_calls),
	'unique_tools': len(tool_counts),
	'tool_counts': dict(tool_counts.most_common()),
	'tool_details': tool_details,
	'bash_program_counts': dict(bash_program_counts.most_common()),
	'bash_commands': bash_commands,
	}


	def print_report(stats: dict):
	"""Print a formatted report of tool usage statistics."""
	print("=" * 60)
	print("TOOL USAGE STATISTICS")
	print("=" * 60)
	print()
	print(f"Total tool calls: {stats['total_calls']}")
	print(f"Unique tools used: {stats['unique_tools']}")
	print()
	print("-" * 60)
	print("TOOL CALL FREQUENCY (sorted by count)")
	print("-" * 60)

	for tool, count in stats['tool_counts'].items():
	pct = (count / stats['total_calls']) * 100
	bar = "█" * int(pct / 2)
	print(f"{tool:20} {count:4} ({pct:5.1f}%) {bar}")

	# Bash program breakdown
	if stats.get('bash_program_counts'):
	print()
	print("-" * 60)
	print("PROGRAMS CALLED VIA BASH (sorted by count)")
	print("-" * 60)

	total_programs = sum(stats['bash_program_counts'].values())
	for prog, count in stats['bash_program_counts'].items():
	pct = (count / total_programs) * 100
	bar = "▓" * int(pct / 2)
	print(f"{prog:20} {count:4} ({pct:5.1f}%) {bar}")

	print()
	print("-" * 60)
	print("BASH COMMAND DETAILS")
	print("-" * 60)
	for cmd_info in stats['bash_commands'][:15]: # Show first 15
	progs = ', '.join(cmd_info['programs']) if cmd_info['programs'] else '(none)'
	desc = cmd_info['description'] or cmd_info['command'][:50]
	print(f" [{progs:20}] {desc[:50]}")

	print()
	print("-" * 60)
	print("SAMPLE DETAILS PER TOOL")
	print("-" * 60)

	for tool, details in stats['tool_details'].items():
	print(f"\n{tool} (showing up to 5 samples):")
	for detail in details[:5]:
	if detail:
	print(f" • {detail[:70]}{'...' if len(str(detail)) > 70 else ''}")


	def find_jsonl_files(directory: str) -> list[Path]:
	"""Recursively find all .jsonl files in a directory."""
	dir_path = Path(directory)
	return sorted(dir_path.rglob("*.jsonl"))


	def analyze_single_file(jsonl_path: str) -> dict \| None:
	"""Analyze a single file and return stats."""
	tool_calls = extract_tool_calls(jsonl_path)
	if not tool_calls:
	return None
	return generate_statistics(tool_calls)


	def merge_statistics(all_stats: list[dict]) -> dict:
	"""Merge statistics from multiple files into a consolidated summary."""
	merged = {
	'total_calls': 0,
	'unique_tools': set(),
	'tool_counts': Counter(),
	'bash_program_counts': Counter(),
	'bash_commands': [],
	'tool_details': {},
	'file_count': len(all_stats),
	'per_file_stats': [],
	}

	for stats in all_stats:
	merged['total_calls'] += stats['total_calls']
	merged['unique_tools'].update(stats['tool_counts'].keys())
	merged['tool_counts'].update(stats['tool_counts'])
	merged['bash_program_counts'].update(stats.get('bash_program_counts', {}))
	merged['bash_commands'].extend(stats.get('bash_commands', [])[:5]) # Sample from each

	# Merge tool details
	for tool, details in stats.get('tool_details', {}).items():
	if tool not in merged['tool_details']:
	merged['tool_details'][tool] = []
	merged['tool_details'][tool].extend(details[:3]) # Sample from each

	# Convert back to regular types for output
	merged['unique_tools'] = len(merged['unique_tools'])
	merged['tool_counts'] = dict(merged['tool_counts'].most_common())
	merged['bash_program_counts'] = dict(merged['bash_program_counts'].most_common())

	return merged


	def print_consolidated_report(stats: dict, file_stats: list[tuple[str, dict]]):
	"""Print a consolidated report across multiple files."""
	print("=" * 70)
	print("CONSOLIDATED TOOL USAGE STATISTICS")
	print("=" * 70)
	print()
	print(f"Files analyzed: {stats['file_count']}")
	print(f"Total tool calls: {stats['total_calls']}")
	print(f"Unique tools used: {stats['unique_tools']}")
	print(f"Average calls per session: {stats['total_calls'] / stats['file_count']:.1f}")
	print()

	# Per-file summary
	print("-" * 70)
	print("PER-SESSION BREAKDOWN")
	print("-" * 70)
	print(f"{'Session ID':<40} {'Calls':>8} {'Tools':>8}")
	print("-" * 70)
	for filepath, fstats in sorted(file_stats, key=lambda x: -x[1]['total_calls'])[:20]:
	session_id = Path(filepath).stem[:36]
	print(f"{session_id:<40} {fstats['total_calls']:>8} {fstats['unique_tools']:>8}")
	if len(file_stats) > 20:
	print(f" ... and {len(file_stats) - 20} more sessions")

	print()
	print("-" * 70)
	print("TOOL CALL FREQUENCY (sorted by count)")
	print("-" * 70)

	max_count = max(stats['tool_counts'].values()) if stats['tool_counts'] else 1
	for tool, count in stats['tool_counts'].items():
	pct = (count / stats['total_calls']) * 100
	bar_len = int((count / max_count) * 40)
	bar = "█" * bar_len
	print(f"{tool:20} {count:6} ({pct:5.1f}%) {bar}")

	# Bash program breakdown
	if stats.get('bash_program_counts'):
	print()
	print("-" * 70)
	print("PROGRAMS CALLED VIA BASH (sorted by count)")
	print("-" * 70)

	total_programs = sum(stats['bash_program_counts'].values())
	max_prog_count = max(stats['bash_program_counts'].values()) if stats['bash_program_counts'] else 1

	# Show top 25 programs
	for i, (prog, count) in enumerate(stats['bash_program_counts'].items()):
	if i >= 25:
	remaining = len(stats['bash_program_counts']) - 25
	print(f" ... and {remaining} more programs")
	break
	pct = (count / total_programs) * 100
	bar_len = int((count / max_prog_count) * 30)
	bar = "▓" * bar_len
	print(f"{prog:20} {count:6} ({pct:5.1f}%) {bar}")

	print()
	print("-" * 70)
	print("SAMPLE DETAILS PER TOOL (from all sessions)")
	print("-" * 70)

	for tool, details in stats['tool_details'].items():
	# Deduplicate and show samples
	unique_details = list(dict.fromkeys(d for d in details if d))[:5]
	print(f"\n{tool} ({stats['tool_counts'].get(tool, 0)} total calls, showing samples):")
	for detail in unique_details:
	if detail:
	print(f" • {str(detail)[:70]}{'...' if len(str(detail)) > 70 else ''}")


	def main():
	if len(sys.argv) < 2:
	print("Usage: python analyze_tools.py <jsonl_file_or_directory>")
	print()
	print(" Single file: python analyze_tools.py session.jsonl")
	print(" Directory: python analyze_tools.py /path/to/sessions/")
	sys.exit(1)

	target_path = Path(sys.argv[1])

	if not target_path.exists():
	print(f"Error: Path not found: {target_path}")
	sys.exit(1)

	# Check if it's a directory or single file
	if target_path.is_dir():
	# Recursive directory mode
	print(f"Scanning directory: {target_path}")
	jsonl_files = find_jsonl_files(target_path)

	if not jsonl_files:
	print("No .jsonl files found in directory.")
	sys.exit(0)

	print(f"Found {len(jsonl_files)} .jsonl files")
	print()

	all_stats = []
	file_stats = []

	for i, jsonl_file in enumerate(jsonl_files):
	stats = analyze_single_file(str(jsonl_file))
	if stats:
	all_stats.append(stats)
	file_stats.append((str(jsonl_file), stats))

	# Progress indicator
	if (i + 1) % 10 == 0 or i + 1 == len(jsonl_files):
	print(f"\rProcessed {i + 1}/{len(jsonl_files)} files...", end="", flush=True)

	print("\n")

	if not all_stats:
	print("No tool calls found in any files.")
	sys.exit(0)

	# Merge and print consolidated report
	merged = merge_statistics(all_stats)
	print_consolidated_report(merged, file_stats)

	# JSON output
	print()
	print("-" * 70)
	print("RAW STATISTICS (JSON)")
	print("-" * 70)
	print(json.dumps({
	'files_analyzed': merged['file_count'],
	'total_calls': merged['total_calls'],
	'unique_tools': merged['unique_tools'],
	'avg_calls_per_session': round(merged['total_calls'] / merged['file_count'], 1),
	'tool_counts': merged['tool_counts'],
	'bash_program_counts': merged['bash_program_counts'],
	}, indent=2))

	else:
	# Single file mode
	print(f"Analyzing: {target_path}")
	print()

	tool_calls = extract_tool_calls(str(target_path))

	if not tool_calls:
	print("No tool calls found in the file.")
	sys.exit(0)

	stats = generate_statistics(tool_calls)
	print_report(stats)

	# JSON output
	print()
	print("-" * 60)
	print("RAW STATISTICS (JSON)")
	print("-" * 60)
	print(json.dumps({
	'total_calls': stats['total_calls'],
	'unique_tools': stats['unique_tools'],
	'tool_counts': stats['tool_counts'],
	'bash_program_counts': stats.get('bash_program_counts', {}),
	}, indent=2))


	if __name__ == '__main__':
	main()
No results found