scottopell/claude_bash_cmds.py

## claude_bash_cmds.py
#!/usr/bin/env -S uv run
# /// script
# requires-python = ">=3.10"
# dependencies = ["bashlex"]
# ///

"""Extract and analyze all Bash commands from Claude Code conversation transcripts.

Phase 1: Scan ~/.claude/projects/**/*.jsonl, extract every Bash tool_use command,
         write commands.jsonl to the current directory.
Phase 2: Parse each command into a shell AST via bashlex, extract simple command
         names (e.g. "git", "cat", "grep"), and print a frequency table.

Usage:
    uv run analyze_claude_commands.py
"""

import glob
import json
import os
import sys
from collections import Counter

import bashlex
from bashlex import ast

# ── Phase 1: Extract ─────────────────────────────────────────────────────────


def parse_project_and_subagent(filepath):
    parts = filepath.split(os.sep)
    try:
        idx = parts.index("projects")
        project = parts[idx + 1] if idx + 1 < len(parts) else None
    except ValueError:
        project = None
    is_subagent = "subagents" in parts
    return project, is_subagent


def extract_commands(filepath):
    project, is_subagent = parse_project_and_subagent(filepath)
    errors = 0

    try:
        fh = open(filepath, "r", encoding="utf-8", errors="replace")
    except IOError as e:
        print(f"  Warning: cannot read {filepath}: {e}", file=sys.stderr)
        return

    with fh:
        for line in fh:
            line = line.strip()
            if not line:
                continue
            try:
                obj = json.loads(line)
            except json.JSONDecodeError:
                errors += 1
                continue

            if obj.get("type") != "assistant":
                continue

            message = obj.get("message")
            if not isinstance(message, dict):
                continue

            content = message.get("content")
            if not isinstance(content, list):
                continue

            timestamp = obj.get("timestamp")
            session_id = obj.get("sessionId")
            cwd = obj.get("cwd")

            for block in content:
                if not isinstance(block, dict):
                    continue
                if block.get("type") != "tool_use" or block.get("name") != "Bash":
                    continue

                inp = block.get("input")
                if not isinstance(inp, dict):
                    continue

                command = inp.get("command")
                if command is None:
                    continue

                yield {
                    "command": command,
                    "timestamp": timestamp,
                    "sessionId": session_id,
                    "project": project,
                    "cwd": cwd,
                    "isSubagent": is_subagent,
                }

    if errors:
        print(f"  Warning: {errors} malformed JSON line(s) in {filepath}", file=sys.stderr)


def phase1_extract(output_path):
    print("Phase 1: Extracting Bash commands from transcripts", file=sys.stderr)

    base = os.path.expanduser("~/.claude/projects")
    pattern = os.path.join(base, "**", "*.jsonl")
    files = sorted(glob.glob(pattern, recursive=True))
    print(f"  Scanning {len(files)} transcript files...", file=sys.stderr)

    total = 0
    with open(output_path, "w") as out:
        for filepath in files:
            for record in extract_commands(filepath):
                out.write(json.dumps(record, ensure_ascii=False))
                out.write("\n")
                total += 1

    print(f"  Wrote {total} commands to {output_path}", file=sys.stderr)
    return total


# ── Phase 2: Analyze ─────────────────────────────────────────────────────────


class CommandNameExtractor(ast.nodevisitor):
    def __init__(self):
        self.commands = []

    def visitcommand(self, n, parts):
        for part in parts:
            if part.kind == "word":
                self.commands.append(part.word)
                break
        return True


def extract_command_names(shell_cmd: str) -> list[str]:
    parts = bashlex.parse(shell_cmd)
    visitor = CommandNameExtractor()
    for part in parts:
        visitor.visit(part)
    return visitor.commands


def phase2_analyze(input_path):
    print("\nPhase 2: Parsing shell ASTs and counting simple commands", file=sys.stderr)

    counts: Counter[str] = Counter()
    total = 0
    error_types: Counter[str] = Counter()

    with open(input_path) as f:
        for lineno, line in enumerate(f, 1):
            record = json.loads(line)
            cmd = record["command"]
            total += 1

            try:
                names = extract_command_names(cmd)
                counts.update(names)
            except Exception as e:
                error_types[type(e).__name__] += 1

    parse_errors = sum(error_types.values())
    parsed_ok = total - parse_errors

    print(f"  {parsed_ok}/{total} commands parsed successfully", file=sys.stderr)
    if error_types:
        breakdown = ", ".join(f"{v} {k}" for k, v in error_types.most_common())
        print(f"  {parse_errors} failed ({breakdown})", file=sys.stderr)
    print(file=sys.stderr)

    print("=== Simple Command Frequency ===")
    print()
    for name, count in counts.most_common():
        print(f"{count:>6}  {name}")
    print()
    print("---")
    print(f"Total input commands: {total}")
    print(f"Unique simple commands: {len(counts)}")
    print(f"Total simple command invocations: {sum(counts.values())}")
    print(f"Parse errors: {parse_errors} ({parse_errors*100/total:.1f}%)")


# ── Main ──────────────────────────────────────────────────────────────────────


def main():
    output_path = "commands.jsonl"
    phase1_extract(output_path)
    phase2_analyze(output_path)


if __name__ == "__main__":
    main()
	#!/usr/bin/env -S uv run
	# /// script
	# requires-python = ">=3.10"
	# dependencies = ["bashlex"]
	# ///

	"""Extract and analyze all Bash commands from Claude Code conversation transcripts.

	Phase 1: Scan ~/.claude/projects/*/.jsonl, extract every Bash tool_use command,
	write commands.jsonl to the current directory.
	Phase 2: Parse each command into a shell AST via bashlex, extract simple command
	names (e.g. "git", "cat", "grep"), and print a frequency table.

	Usage:
	uv run analyze_claude_commands.py
	"""

	import glob
	import json
	import os
	import sys
	from collections import Counter

	import bashlex
	from bashlex import ast

	# ── Phase 1: Extract ─────────────────────────────────────────────────────────


	def parse_project_and_subagent(filepath):
	parts = filepath.split(os.sep)
	try:
	idx = parts.index("projects")
	project = parts[idx + 1] if idx + 1 < len(parts) else None
	except ValueError:
	project = None
	is_subagent = "subagents" in parts
	return project, is_subagent


	def extract_commands(filepath):
	project, is_subagent = parse_project_and_subagent(filepath)
	errors = 0

	try:
	fh = open(filepath, "r", encoding="utf-8", errors="replace")
	except IOError as e:
	print(f" Warning: cannot read {filepath}: {e}", file=sys.stderr)
	return

	with fh:
	for line in fh:
	line = line.strip()
	if not line:
	continue
	try:
	obj = json.loads(line)
	except json.JSONDecodeError:
	errors += 1
	continue

	if obj.get("type") != "assistant":
	continue

	message = obj.get("message")
	if not isinstance(message, dict):
	continue

	content = message.get("content")
	if not isinstance(content, list):
	continue

	timestamp = obj.get("timestamp")
	session_id = obj.get("sessionId")
	cwd = obj.get("cwd")

	for block in content:
	if not isinstance(block, dict):
	continue
	if block.get("type") != "tool_use" or block.get("name") != "Bash":
	continue

	inp = block.get("input")
	if not isinstance(inp, dict):
	continue

	command = inp.get("command")
	if command is None:
	continue

	yield {
	"command": command,
	"timestamp": timestamp,
	"sessionId": session_id,
	"project": project,
	"cwd": cwd,
	"isSubagent": is_subagent,
	}

	if errors:
	print(f" Warning: {errors} malformed JSON line(s) in {filepath}", file=sys.stderr)


	def phase1_extract(output_path):
	print("Phase 1: Extracting Bash commands from transcripts", file=sys.stderr)

	base = os.path.expanduser("~/.claude/projects")
	pattern = os.path.join(base, "*", ".jsonl")
	files = sorted(glob.glob(pattern, recursive=True))
	print(f" Scanning {len(files)} transcript files...", file=sys.stderr)

	total = 0
	with open(output_path, "w") as out:
	for filepath in files:
	for record in extract_commands(filepath):
	out.write(json.dumps(record, ensure_ascii=False))
	out.write("\n")
	total += 1

	print(f" Wrote {total} commands to {output_path}", file=sys.stderr)
	return total


	# ── Phase 2: Analyze ─────────────────────────────────────────────────────────


	class CommandNameExtractor(ast.nodevisitor):
	def __init__(self):
	self.commands = []

	def visitcommand(self, n, parts):
	for part in parts:
	if part.kind == "word":
	self.commands.append(part.word)
	break
	return True


	def extract_command_names(shell_cmd: str) -> list[str]:
	parts = bashlex.parse(shell_cmd)
	visitor = CommandNameExtractor()
	for part in parts:
	visitor.visit(part)
	return visitor.commands


	def phase2_analyze(input_path):
	print("\nPhase 2: Parsing shell ASTs and counting simple commands", file=sys.stderr)

	counts: Counter[str] = Counter()
	total = 0
	error_types: Counter[str] = Counter()

	with open(input_path) as f:
	for lineno, line in enumerate(f, 1):
	record = json.loads(line)
	cmd = record["command"]
	total += 1

	try:
	names = extract_command_names(cmd)
	counts.update(names)
	except Exception as e:
	error_types[type(e).__name__] += 1

	parse_errors = sum(error_types.values())
	parsed_ok = total - parse_errors

	print(f" {parsed_ok}/{total} commands parsed successfully", file=sys.stderr)
	if error_types:
	breakdown = ", ".join(f"{v} {k}" for k, v in error_types.most_common())
	print(f" {parse_errors} failed ({breakdown})", file=sys.stderr)
	print(file=sys.stderr)

	print("=== Simple Command Frequency ===")
	print()
	for name, count in counts.most_common():
	print(f"{count:>6} {name}")
	print()
	print("---")
	print(f"Total input commands: {total}")
	print(f"Unique simple commands: {len(counts)}")
	print(f"Total simple command invocations: {sum(counts.values())}")
	print(f"Parse errors: {parse_errors} ({parse_errors*100/total:.1f}%)")


	# ── Main ──────────────────────────────────────────────────────────────────────


	def main():
	output_path = "commands.jsonl"
	phase1_extract(output_path)
	phase2_analyze(output_path)


	if __name__ == "__main__":
	main()
No results found