Tolsi/extract_prompts.py

## extract_prompts.py
#!/usr/bin/env python3
"""
Extract user prompts from Claude Code project files.

This script walks through ~/.claude/projects/ directory, parses all JSONL files,
extracts user messages with timestamps, and saves them to text files in the
current working directory. Each output file is named after its project directory.
"""

import json
from pathlib import Path
from datetime import datetime
from typing import List, Tuple


def parse_timestamp(iso_timestamp: str) -> datetime:
    """Parse ISO 8601 timestamp to datetime object."""
    # Handle both with and without microseconds
    if '.' in iso_timestamp:
        return datetime.fromisoformat(iso_timestamp.replace('Z', '+00:00'))
    else:
        return datetime.fromisoformat(iso_timestamp.replace('Z', '+00:00'))


def extract_user_prompts_from_jsonl(jsonl_file: Path) -> List[Tuple[datetime, str]]:
    """Extract user prompts with timestamps from a JSONL file."""
    prompts = []

    try:
        with open(jsonl_file, 'r', encoding='utf-8') as f:
            for line_num, line in enumerate(f, 1):
                line = line.strip()
                if not line:
                    continue

                try:
                    data = json.loads(line)

                    timestamp_str = data.get('timestamp')
                    content = None

                    # Filter for user messages (exclude Warmup and sidechain messages)
                    if data.get('type') == 'user':
                        # Skip sidechain messages (Warmup messages)
                        if data.get('isSidechain', False):
                            continue

                        message = data.get('message', {})
                        content = message.get('content', '')

                    # Also process queue-operation enqueue messages
                    elif data.get('type') == 'queue-operation' and data.get('operation') == 'enqueue':
                        content = data.get('content', '')

                    # Process content if we found it
                    if timestamp_str and content and isinstance(content, str) and content != 'Warmup':
                        # Skip messages with XML-like tags (command messages, notifications, etc)
                        # and system messages
                        if any(tag in content for tag in [
                            '<command-name>',
                            '<command-message>',
                            '<command-args>',
                            '<local-command-stdout>',
                            '<local-command-stderr>',
                            '<local-command-caveat>',
                            '<bash-notification>',
                            '<shell-id>',
                            '<output-file>',
                            '<status>',
                            '<summary>',
                            'Caveat: The messages below were generated by the user while running local commands',
                            'This session is being continued from a previous conversation'
                        ]):
                            continue

                        timestamp = parse_timestamp(timestamp_str)
                        prompts.append((timestamp, content))

                except json.JSONDecodeError as e:
                    print(f"  Warning: Skipping malformed JSON at line {line_num} in {jsonl_file.name}: {e}")
                except Exception as e:
                    print(f"  Warning: Error processing line {line_num} in {jsonl_file.name}: {e}")

    except Exception as e:
        print(f"  Error reading {jsonl_file}: {e}")

    return prompts


def process_project(project_dir: Path, output_dir: Path) -> int:
    """Process all JSONL files in a project directory and create output text file."""
    print(f"\nProcessing project: {project_dir.name}")

    # Find all .jsonl files in the project directory
    jsonl_files = list(project_dir.glob('*.jsonl'))

    if not jsonl_files:
        print(f"  No JSONL files found in {project_dir.name}")
        return 0

    print(f"  Found {len(jsonl_files)} JSONL file(s)")

    # Collect all user prompts from all JSONL files
    all_prompts = []
    for jsonl_file in jsonl_files:
        prompts = extract_user_prompts_from_jsonl(jsonl_file)
        all_prompts.extend(prompts)
        print(f"  Extracted {len(prompts)} prompt(s) from {jsonl_file.name}")

    if not all_prompts:
        print(f"  No user prompts found in {project_dir.name}")
        return 0

    # Sort prompts by timestamp
    all_prompts.sort(key=lambda x: x[0])

    # Use full directory name as filename
    output_file = output_dir / f'{project_dir.name}.txt'
    try:
        with open(output_file, 'w', encoding='utf-8') as f:
            for timestamp, content in all_prompts:
                # Format: YYYY-MM-DD HH:MM:SS - prompt
                formatted_time = timestamp.strftime('%Y-%m-%d %H:%M:%S')
                f.write(f"{formatted_time} - {content}\n")

        print(f"  ✓ Created {output_file.name} with {len(all_prompts)} prompt(s)")
        return len(all_prompts)

    except Exception as e:
        print(f"  Error writing to {output_file}: {e}")
        return 0


def main():
    """Main function to process all Claude Code projects."""
    print("Claude Code Project Prompt Extractor")
    print("=" * 50)

    # Get the projects directory
    projects_dir = Path.home() / '.claude' / 'projects'

    if not projects_dir.exists():
        print(f"Error: Projects directory not found at {projects_dir}")
        return

    print(f"Projects directory: {projects_dir}")

    # Output to current working directory
    output_dir = Path.cwd()
    print(f"Output directory: {output_dir}")

    # Find all project directories (those starting with -)
    project_dirs = [d for d in projects_dir.iterdir() if d.is_dir() and d.name.startswith('-')]

    if not project_dirs:
        print("No project directories found")
        return

    print(f"Found {len(project_dirs)} project(s)")

    # Process each project
    total_prompts = 0
    for project_dir in sorted(project_dirs):
        count = process_project(project_dir, output_dir)
        total_prompts += count

    # Summary
    print("\n" + "=" * 50)
    print(f"Summary: Extracted {total_prompts} total prompt(s) from {len(project_dirs)} project(s)")
    print(f"Files saved to: {output_dir}")
    print("=" * 50)


if __name__ == '__main__':
    main()
	#!/usr/bin/env python3
	"""
	Extract user prompts from Claude Code project files.

	This script walks through ~/.claude/projects/ directory, parses all JSONL files,
	extracts user messages with timestamps, and saves them to text files in the
	current working directory. Each output file is named after its project directory.
	"""

	import json
	from pathlib import Path
	from datetime import datetime
	from typing import List, Tuple


	def parse_timestamp(iso_timestamp: str) -> datetime:
	"""Parse ISO 8601 timestamp to datetime object."""
	# Handle both with and without microseconds
	if '.' in iso_timestamp:
	return datetime.fromisoformat(iso_timestamp.replace('Z', '+00:00'))
	else:
	return datetime.fromisoformat(iso_timestamp.replace('Z', '+00:00'))


	def extract_user_prompts_from_jsonl(jsonl_file: Path) -> List[Tuple[datetime, str]]:
	"""Extract user prompts with timestamps from a JSONL file."""
	prompts = []

	try:
	with open(jsonl_file, 'r', encoding='utf-8') as f:
	for line_num, line in enumerate(f, 1):
	line = line.strip()
	if not line:
	continue

	try:
	data = json.loads(line)

	timestamp_str = data.get('timestamp')
	content = None

	# Filter for user messages (exclude Warmup and sidechain messages)
	if data.get('type') == 'user':
	# Skip sidechain messages (Warmup messages)
	if data.get('isSidechain', False):
	continue

	message = data.get('message', {})
	content = message.get('content', '')

	# Also process queue-operation enqueue messages
	elif data.get('type') == 'queue-operation' and data.get('operation') == 'enqueue':
	content = data.get('content', '')

	# Process content if we found it
	if timestamp_str and content and isinstance(content, str) and content != 'Warmup':
	# Skip messages with XML-like tags (command messages, notifications, etc)
	# and system messages
	if any(tag in content for tag in [
	'<command-name>',
	'<command-message>',
	'<command-args>',
	'<local-command-stdout>',
	'<local-command-stderr>',
	'<local-command-caveat>',
	'<bash-notification>',
	'<shell-id>',
	'<output-file>',
	'<status>',
	'<summary>',
	'Caveat: The messages below were generated by the user while running local commands',
	'This session is being continued from a previous conversation'
	]):
	continue

	timestamp = parse_timestamp(timestamp_str)
	prompts.append((timestamp, content))

	except json.JSONDecodeError as e:
	print(f" Warning: Skipping malformed JSON at line {line_num} in {jsonl_file.name}: {e}")
	except Exception as e:
	print(f" Warning: Error processing line {line_num} in {jsonl_file.name}: {e}")

	except Exception as e:
	print(f" Error reading {jsonl_file}: {e}")

	return prompts


	def process_project(project_dir: Path, output_dir: Path) -> int:
	"""Process all JSONL files in a project directory and create output text file."""
	print(f"\nProcessing project: {project_dir.name}")

	# Find all .jsonl files in the project directory
	jsonl_files = list(project_dir.glob('*.jsonl'))

	if not jsonl_files:
	print(f" No JSONL files found in {project_dir.name}")
	return 0

	print(f" Found {len(jsonl_files)} JSONL file(s)")

	# Collect all user prompts from all JSONL files
	all_prompts = []
	for jsonl_file in jsonl_files:
	prompts = extract_user_prompts_from_jsonl(jsonl_file)
	all_prompts.extend(prompts)
	print(f" Extracted {len(prompts)} prompt(s) from {jsonl_file.name}")

	if not all_prompts:
	print(f" No user prompts found in {project_dir.name}")
	return 0

	# Sort prompts by timestamp
	all_prompts.sort(key=lambda x: x[0])

	# Use full directory name as filename
	output_file = output_dir / f'{project_dir.name}.txt'
	try:
	with open(output_file, 'w', encoding='utf-8') as f:
	for timestamp, content in all_prompts:
	# Format: YYYY-MM-DD HH:MM:SS - prompt
	formatted_time = timestamp.strftime('%Y-%m-%d %H:%M:%S')
	f.write(f"{formatted_time} - {content}\n")

	print(f" ✓ Created {output_file.name} with {len(all_prompts)} prompt(s)")
	return len(all_prompts)

	except Exception as e:
	print(f" Error writing to {output_file}: {e}")
	return 0


	def main():
	"""Main function to process all Claude Code projects."""
	print("Claude Code Project Prompt Extractor")
	print("=" * 50)

	# Get the projects directory
	projects_dir = Path.home() / '.claude' / 'projects'

	if not projects_dir.exists():
	print(f"Error: Projects directory not found at {projects_dir}")
	return

	print(f"Projects directory: {projects_dir}")

	# Output to current working directory
	output_dir = Path.cwd()
	print(f"Output directory: {output_dir}")

	# Find all project directories (those starting with -)
	project_dirs = [d for d in projects_dir.iterdir() if d.is_dir() and d.name.startswith('-')]

	if not project_dirs:
	print("No project directories found")
	return

	print(f"Found {len(project_dirs)} project(s)")

	# Process each project
	total_prompts = 0
	for project_dir in sorted(project_dirs):
	count = process_project(project_dir, output_dir)
	total_prompts += count

	# Summary
	print("\n" + "=" * 50)
	print(f"Summary: Extracted {total_prompts} total prompt(s) from {len(project_dirs)} project(s)")
	print(f"Files saved to: {output_dir}")
	print("=" * 50)


	if __name__ == '__main__':
	main()
No results found