josmithiii/README.md

## README.md

      
    Raw
  

              README.md
            
          
    voice

Voice-to-text transcription using OpenAI Whisper, with integrations for AI coding assistants and Emacs.
Requirements


openai-whisper: pip install openai-whisper
sox: brew install sox (for the rec command)
ffmpeg: brew install ffmpeg

Installation

curl -o ~/.local/bin/voice https://gist.githubusercontent.com/josmithiii/607f6090dd9599763d6cc109280abe63/raw/voice
chmod +x ~/.local/bin/voice
Usage

voice                 # Record and print transcription
voice -c              # Record and copy to clipboard
voice 10              # Record for 10 seconds
voice claude          # Record and send to Claude Code
voice -l -c           # Loop mode with clipboard (Ctrl+G toggles, Ctrl+C exits)
voice -l emacs        # Loop mode inserting into Emacs at point
voice -l org          # Loop mode capturing to org-mode
Targets


Target
Description


claude
Send to Claude Code


codex
Send to Codex


gemini
Send to Gemini


aider
Send to Aider


gpt
Send to GPT


emacs
Insert at point (requires server)


org
Capture as org-mode voice note


Environment Variables


WHISPER_MODEL: Whisper model to use (default: base). Options: tiny, base, small, medium, large
WHISPER_LANG: Language code (default: en)

Loop Mode Controls


Ctrl+G: Toggle recording on/off
Ctrl+C: Exit

Emacs Integration

Add to your init file:
(setq server-socket-dir "~/.emacs.d/server")
(unless (display-graphic-p)
  (server-start))
For org-capture support, add:
(setq org-capture-templates
      '(("v" "Voice note" entry (file+headline "~/org/voice.org" "Voice Notes")
         "* %U\n%i%?\n" :empty-lines 1)))

  
## voice
#!/bin/bash
# Voice-to-text using Whisper, with integrations for AI assistants and Emacs

print_help() {
    cat <<'EOF'
voice - Voice-to-text using Whisper

USAGE:
    voice [OPTIONS] [DURATION] [TARGET]

OPTIONS:
    -h, --help       Show this help message
    -c, --clipboard  Copy transcription to clipboard
    -l, --loop       Continuous mode (Ctrl+G toggles recording, Ctrl+C exits)

CONTROLS:
    Single-shot: Ctrl+G stop and transcribe, Ctrl+C abort
    Loop mode:   Ctrl+G toggle recording, Ctrl+C exit
    Note: loop mode requires a TTY (interactive terminal)

DURATION:
    Number of seconds to record (optional, default: until Ctrl+G)

TARGETS:
    claude    Send to Claude Code
    codex     Send to Codex
    gemini    Send to Gemini
    aider     Send to Aider
    gpt       Send to GPT
    emacs     Insert at point in Emacs (requires server)
    org       Capture as org-mode voice note
    log       Append to log file (see VOICE_LOG_FILE)
    stdout    Force stdout output (same as default)

EXAMPLES:
    voice                 Record and print transcription
    voice -c              Record and copy to clipboard
    voice 10              Record 10 seconds, print transcription
    voice claude          Record and send to Claude Code
    voice -l -c           Loop mode: Ctrl+G to record, copies to clipboard
    voice -l emacs        Loop mode: insert each recording into Emacs

ENVIRONMENT:
    WHISPER_MODEL   Whisper model (default: base)
    WHISPER_LANG    Language code (default: en)
    VOICE_LOG_FILE  Log file path (default: /tmp/voice.log)
    VOICE_EMACS_VERBOSE  Show emacsclient output (set to 1)

EOF
}

show_help() {
    print_help
    exit 0
}

usage_error() {
    echo "Error: $1" >&2
    print_help >&2
    exit 1
}

MODEL="${WHISPER_MODEL:-base}"
LANG="${WHISPER_LANG:-en}"
RESULT_FILE="/tmp/voice_result_${$}.txt"
LOG_FILE="${VOICE_LOG_FILE:-/tmp/voice.log}"
EMACS_VERBOSE="${VOICE_EMACS_VERBOSE:-0}"

# Parse args
DURATION=""
TARGET=""
CLIPBOARD=false
LOOP=false
unknown_args=()

# No arguments: show brief usage
if [ $# -eq 0 ]; then
    echo "voice - Voice-to-text using Whisper. Use --help for options." >&2
    echo "" >&2
fi

for arg in "$@"; do
    case "$arg" in
        -h|--help) show_help ;;
        [0-9]*) DURATION="$arg" ;;
        -c|--clipboard) CLIPBOARD=true ;;
        -l|--loop) LOOP=true ;;
        claude|codex|gemini|aider|gpt|emacs|org|log|stdout) TARGET="$arg" ;;
        *) unknown_args+=("$arg") ;;
    esac
done

if [ ${#unknown_args[@]} -gt 0 ]; then
    usage_error "Unknown argument(s): ${unknown_args[*]}"
fi

run_emacsclient() {
    if [ "$EMACS_VERBOSE" = "1" ]; then
        emacsclient --eval "$1"
    else
        emacsclient --eval "$1" >/dev/null
    fi
}

transcribe() {
    local tmpwav="$1"
    local tmptxt="/tmp/voice_$$.txt"
    local resultfile="$RESULT_FILE"
    rm -f "$tmptxt" "$resultfile"

    # Check if we got any audio
    if [ ! -f "$tmpwav" ] || [ ! -s "$tmpwav" ]; then
        return 1
    fi

    echo "Transcribing..." >&2
    whisper "$tmpwav" --model "$MODEL" --language "$LANG" --output_format txt --output_dir /tmp >/dev/null 2>&1

    if [ -f "$tmptxt" ]; then
        local result
        result=$(grep -v '^\[' "$tmptxt" | tr '\n' ' ' | sed 's/  */ /g; s/^ //; s/ $//')
        rm -f "$tmpwav" "$tmptxt"
        echo "$result" > "$resultfile"  # Save for retrieval
        echo "$result"      # For piping (may be lost after signal)
        return 0
    else
        rm -f "$tmpwav"
        return 1
    fi
}

record_and_transcribe() {
    local tmpwav="/tmp/voice_$$.wav"
    rm -f "$tmpwav"

    # Remap Ctrl+G to SIGQUIT for stopping (avoids bash SIGINT special handling)
    local old_stty=""
    local stty_enabled=false
    if [ -t 0 ]; then
        old_stty=$(stty -g 2>/dev/null)
        stty quit '^G'
        stty_enabled=true
    fi

    echo "" >&2
    echo "🎤 Recording... (Ctrl+G to stop)" >&2

    # Run rec in background
    if [ -n "$DURATION" ]; then
        rec -r 16000 -c 1 "$tmpwav" trim 0 "$DURATION" 2>/dev/null &
    else
        rec -r 16000 -c 1 "$tmpwav" 2>/dev/null &
    fi
    local rec_pid=$!

    # Trap SIGQUIT to stop recording gracefully
    trap 'kill -INT $rec_pid 2>/dev/null' QUIT
    while kill -0 $rec_pid 2>/dev/null; do
        sleep 0.1 2>/dev/null || sleep 1
    done
    trap - QUIT
    if $stty_enabled && [ -n "$old_stty" ]; then
        stty "$old_stty" 2>/dev/null
    fi
    echo "" >&2

    transcribe "$tmpwav"
}

send_to_target() {
    local text="$1"
    echo ">>> $text" >&2
    case "$TARGET" in
        claude) ~/.claude/local/claude -p "$text" --print ;;
        codex)  codex "$text" ;;
        gemini) gemini "$text" ;;
        aider)  aider --message "$text" ;;
        gpt)    gpt "$text" ;;
        emacs)  run_emacsclient "(insert \"$(printf '%s' "$text" | sed 's/\\/\\\\/g; s/"/\\"/g')\")" ;;
        org)    run_emacsclient "(progn (org-capture nil \"v\") (insert \"$(printf '%s' "$text" | sed 's/\\/\\\\/g; s/"/\\"/g')\") (org-capture-finalize))" ;;
        log)    echo "$text" >> "$LOG_FILE" ;;
        stdout) echo "$text" ;;
    esac
}

handle_text() {
    local TEXT="$1"
    if [ -z "$TEXT" ]; then
        return
    fi

    if $CLIPBOARD; then
        echo "$TEXT" | pbcopy
        echo "Copied: $TEXT" >&2
    elif [ -n "$TARGET" ]; then
        send_to_target "$TEXT"
    else
        echo "$TEXT"
    fi
}

# Single-shot mode
if ! $LOOP; then
    # Trap SIGQUIT in parent to prevent exit during subshell capture
    trap '' QUIT
    TEXT=$(record_and_transcribe)
    trap - QUIT
    # If stdout capture failed (signal case), read from result file
    if [ -z "$TEXT" ] && [ -f "$RESULT_FILE" ]; then
        TEXT=$(cat "$RESULT_FILE")
        rm -f "$RESULT_FILE"
    fi
    if [ -z "$TEXT" ]; then
        echo "Transcription failed" >&2
        exit 1
    fi
    handle_text "$TEXT"
    exit 0
fi

# Loop mode with Ctrl+G toggle
TMPWAV="/tmp/voice_$$.wav"
REC_PID=""
RECORDING=false
OLD_STTY=""

cleanup() {
    [ -n "$REC_PID" ] && kill "$REC_PID" 2>/dev/null
    rm -f "$TMPWAV" "/tmp/voice_$$.txt" "$RESULT_FILE"
    # Restore terminal settings
    [ -n "$OLD_STTY" ] && stty "$OLD_STTY" 2>/dev/null
}
trap cleanup EXIT

# Save terminal settings and remap Ctrl+G to SIGQUIT
if ! [ -t 0 ]; then
    echo "Loop mode requires a TTY." >&2
    exit 1
fi
OLD_STTY=$(stty -g)
stty quit '^G'

echo "Voice loop mode. Ctrl+G to start/stop recording, Ctrl+C to exit." >&2
echo "" >&2
echo "⏸  Ready. Press Ctrl+G to start recording..." >&2

toggle_recording() {
    if $RECORDING; then
        # Stop recording
        RECORDING=false
        if [ -n "$REC_PID" ]; then
            kill "$REC_PID" 2>/dev/null
            wait "$REC_PID" 2>/dev/null
            REC_PID=""
        fi
        # Transcribe and handle
        TEXT=$(transcribe "$TMPWAV")
        handle_text "$TEXT"
        echo "" >&2
        echo "⏸  Ready. Press Ctrl+G to start recording..." >&2
    else
        # Start recording
        RECORDING=true
        rm -f "$TMPWAV"
        echo "" >&2
        echo "🎤 Recording... (Ctrl+G to stop)" >&2
        rec -r 16000 -c 1 "$TMPWAV" 2>/dev/null &
        REC_PID=$!
    fi
}

trap toggle_recording QUIT
trap 'echo ""; echo "Goodbye!" >&2; exit 0' INT

# Wait forever, signals do the work
while true; do
    sleep 1
done
Target	Description
claude	Send to Claude Code
codex	Send to Codex
gemini	Send to Gemini
aider	Send to Aider
gpt	Send to GPT
emacs	Insert at point (requires server)
org	Capture as org-mode voice note
	#!/bin/bash
	# Voice-to-text using Whisper, with integrations for AI assistants and Emacs

	print_help() {
	cat <<'EOF'
	voice - Voice-to-text using Whisper

	USAGE:
	voice [OPTIONS] [DURATION] [TARGET]

	OPTIONS:
	-h, --help Show this help message
	-c, --clipboard Copy transcription to clipboard
	-l, --loop Continuous mode (Ctrl+G toggles recording, Ctrl+C exits)

	CONTROLS:
	Single-shot: Ctrl+G stop and transcribe, Ctrl+C abort
	Loop mode: Ctrl+G toggle recording, Ctrl+C exit
	Note: loop mode requires a TTY (interactive terminal)

	DURATION:
	Number of seconds to record (optional, default: until Ctrl+G)

	TARGETS:
	claude Send to Claude Code
	codex Send to Codex
	gemini Send to Gemini
	aider Send to Aider
	gpt Send to GPT
	emacs Insert at point in Emacs (requires server)
	org Capture as org-mode voice note
	log Append to log file (see VOICE_LOG_FILE)
	stdout Force stdout output (same as default)

	EXAMPLES:
	voice Record and print transcription
	voice -c Record and copy to clipboard
	voice 10 Record 10 seconds, print transcription
	voice claude Record and send to Claude Code
	voice -l -c Loop mode: Ctrl+G to record, copies to clipboard
	voice -l emacs Loop mode: insert each recording into Emacs

	ENVIRONMENT:
	WHISPER_MODEL Whisper model (default: base)
	WHISPER_LANG Language code (default: en)
	VOICE_LOG_FILE Log file path (default: /tmp/voice.log)
	VOICE_EMACS_VERBOSE Show emacsclient output (set to 1)

	EOF
	}

	show_help() {
	print_help
	exit 0
	}

	usage_error() {
	echo "Error: $1" >&2
	print_help >&2
	exit 1
	}

	MODEL="${WHISPER_MODEL:-base}"
	LANG="${WHISPER_LANG:-en}"
	RESULT_FILE="/tmp/voice_result_${$}.txt"
	LOG_FILE="${VOICE_LOG_FILE:-/tmp/voice.log}"
	EMACS_VERBOSE="${VOICE_EMACS_VERBOSE:-0}"

	# Parse args
	DURATION=""
	TARGET=""
	CLIPBOARD=false
	LOOP=false
	unknown_args=()

	# No arguments: show brief usage
	if [ $# -eq 0 ]; then
	echo "voice - Voice-to-text using Whisper. Use --help for options." >&2
	echo "" >&2
	fi

	for arg in "$@"; do
	case "$arg" in
	-h\|--help) show_help ;;
	[0-9]*) DURATION="$arg" ;;
	-c\|--clipboard) CLIPBOARD=true ;;
	-l\|--loop) LOOP=true ;;
	claude\|codex\|gemini\|aider\|gpt\|emacs\|org\|log\|stdout) TARGET="$arg" ;;
	*) unknown_args+=("$arg") ;;
	esac
	done

	if [ ${#unknown_args[@]} -gt 0 ]; then
	usage_error "Unknown argument(s): ${unknown_args[*]}"
	fi

	run_emacsclient() {
	if [ "$EMACS_VERBOSE" = "1" ]; then
	emacsclient --eval "$1"
	else
	emacsclient --eval "$1" >/dev/null
	fi
	}

	transcribe() {
	local tmpwav="$1"
	local tmptxt="/tmp/voice_$$.txt"
	local resultfile="$RESULT_FILE"
	rm -f "$tmptxt" "$resultfile"

	# Check if we got any audio
	if [ ! -f "$tmpwav" ] \|\| [ ! -s "$tmpwav" ]; then
	return 1
	fi

	echo "Transcribing..." >&2
	whisper "$tmpwav" --model "$MODEL" --language "$LANG" --output_format txt --output_dir /tmp >/dev/null 2>&1

	if [ -f "$tmptxt" ]; then
	local result
	result=$(grep -v '^\[' "$tmptxt" \| tr '\n' ' ' \| sed 's/ */ /g; s/^ //; s/ $//')
	rm -f "$tmpwav" "$tmptxt"
	echo "$result" > "$resultfile" # Save for retrieval
	echo "$result" # For piping (may be lost after signal)
	return 0
	else
	rm -f "$tmpwav"
	return 1
	fi
	}

	record_and_transcribe() {
	local tmpwav="/tmp/voice_$$.wav"
	rm -f "$tmpwav"

	# Remap Ctrl+G to SIGQUIT for stopping (avoids bash SIGINT special handling)
	local old_stty=""
	local stty_enabled=false
	if [ -t 0 ]; then
	old_stty=$(stty -g 2>/dev/null)
	stty quit '^G'
	stty_enabled=true
	fi

	echo "" >&2
	echo "🎤 Recording... (Ctrl+G to stop)" >&2

	# Run rec in background
	if [ -n "$DURATION" ]; then
	rec -r 16000 -c 1 "$tmpwav" trim 0 "$DURATION" 2>/dev/null &
	else
	rec -r 16000 -c 1 "$tmpwav" 2>/dev/null &
	fi
	local rec_pid=$!

	# Trap SIGQUIT to stop recording gracefully
	trap 'kill -INT $rec_pid 2>/dev/null' QUIT
	while kill -0 $rec_pid 2>/dev/null; do
	sleep 0.1 2>/dev/null \|\| sleep 1
	done
	trap - QUIT
	if $stty_enabled && [ -n "$old_stty" ]; then
	stty "$old_stty" 2>/dev/null
	fi
	echo "" >&2

	transcribe "$tmpwav"
	}

	send_to_target() {
	local text="$1"
	echo ">>> $text" >&2
	case "$TARGET" in
	claude) ~/.claude/local/claude -p "$text" --print ;;
	codex) codex "$text" ;;
	gemini) gemini "$text" ;;
	aider) aider --message "$text" ;;
	gpt) gpt "$text" ;;
	emacs) run_emacsclient "(insert \"$(printf '%s' "$text" \| sed 's/\\/\\\\/g; s/"/\\"/g')\")" ;;
	org) run_emacsclient "(progn (org-capture nil \"v\") (insert \"$(printf '%s' "$text" \| sed 's/\\/\\\\/g; s/"/\\"/g')\") (org-capture-finalize))" ;;
	log) echo "$text" >> "$LOG_FILE" ;;
	stdout) echo "$text" ;;
	esac
	}

	handle_text() {
	local TEXT="$1"
	if [ -z "$TEXT" ]; then
	return
	fi

	if $CLIPBOARD; then
	echo "$TEXT" \| pbcopy
	echo "Copied: $TEXT" >&2
	elif [ -n "$TARGET" ]; then
	send_to_target "$TEXT"
	else
	echo "$TEXT"
	fi
	}

	# Single-shot mode
	if ! $LOOP; then
	# Trap SIGQUIT in parent to prevent exit during subshell capture
	trap '' QUIT
	TEXT=$(record_and_transcribe)
	trap - QUIT
	# If stdout capture failed (signal case), read from result file
	if [ -z "$TEXT" ] && [ -f "$RESULT_FILE" ]; then
	TEXT=$(cat "$RESULT_FILE")
	rm -f "$RESULT_FILE"
	fi
	if [ -z "$TEXT" ]; then
	echo "Transcription failed" >&2
	exit 1
	fi
	handle_text "$TEXT"
	exit 0
	fi

	# Loop mode with Ctrl+G toggle
	TMPWAV="/tmp/voice_$$.wav"
	REC_PID=""
	RECORDING=false
	OLD_STTY=""

	cleanup() {
	[ -n "$REC_PID" ] && kill "$REC_PID" 2>/dev/null
	rm -f "$TMPWAV" "/tmp/voice_$$.txt" "$RESULT_FILE"
	# Restore terminal settings
	[ -n "$OLD_STTY" ] && stty "$OLD_STTY" 2>/dev/null
	}
	trap cleanup EXIT

	# Save terminal settings and remap Ctrl+G to SIGQUIT
	if ! [ -t 0 ]; then
	echo "Loop mode requires a TTY." >&2
	exit 1
	fi
	OLD_STTY=$(stty -g)
	stty quit '^G'

	echo "Voice loop mode. Ctrl+G to start/stop recording, Ctrl+C to exit." >&2
	echo "" >&2
	echo "⏸ Ready. Press Ctrl+G to start recording..." >&2

	toggle_recording() {
	if $RECORDING; then
	# Stop recording
	RECORDING=false
	if [ -n "$REC_PID" ]; then
	kill "$REC_PID" 2>/dev/null
	wait "$REC_PID" 2>/dev/null
	REC_PID=""
	fi
	# Transcribe and handle
	TEXT=$(transcribe "$TMPWAV")
	handle_text "$TEXT"
	echo "" >&2
	echo "⏸ Ready. Press Ctrl+G to start recording..." >&2
	else
	# Start recording
	RECORDING=true
	rm -f "$TMPWAV"
	echo "" >&2
	echo "🎤 Recording... (Ctrl+G to stop)" >&2
	rec -r 16000 -c 1 "$TMPWAV" 2>/dev/null &
	REC_PID=$!
	fi
	}

	trap toggle_recording QUIT
	trap 'echo ""; echo "Goodbye!" >&2; exit 0' INT

	# Wait forever, signals do the work
	while true; do
	sleep 1
	done