|
#!/bin/bash |
|
# Voice-to-text using Whisper, with integrations for AI assistants and Emacs |
|
|
|
print_help() { |
|
cat <<'EOF' |
|
voice - Voice-to-text using Whisper |
|
|
|
USAGE: |
|
voice [OPTIONS] [DURATION] [TARGET] |
|
|
|
OPTIONS: |
|
-h, --help Show this help message |
|
-c, --clipboard Copy transcription to clipboard |
|
-l, --loop Continuous mode (Ctrl+G toggles recording, Ctrl+C exits) |
|
|
|
CONTROLS: |
|
Single-shot: Ctrl+G stop and transcribe, Ctrl+C abort |
|
Loop mode: Ctrl+G toggle recording, Ctrl+C exit |
|
Note: loop mode requires a TTY (interactive terminal) |
|
|
|
DURATION: |
|
Number of seconds to record (optional, default: until Ctrl+G) |
|
|
|
TARGETS: |
|
claude Send to Claude Code |
|
codex Send to Codex |
|
gemini Send to Gemini |
|
aider Send to Aider |
|
gpt Send to GPT |
|
emacs Insert at point in Emacs (requires server) |
|
org Capture as org-mode voice note |
|
log Append to log file (see VOICE_LOG_FILE) |
|
stdout Force stdout output (same as default) |
|
|
|
EXAMPLES: |
|
voice Record and print transcription |
|
voice -c Record and copy to clipboard |
|
voice 10 Record 10 seconds, print transcription |
|
voice claude Record and send to Claude Code |
|
voice -l -c Loop mode: Ctrl+G to record, copies to clipboard |
|
voice -l emacs Loop mode: insert each recording into Emacs |
|
|
|
ENVIRONMENT: |
|
WHISPER_MODEL Whisper model (default: base) |
|
WHISPER_LANG Language code (default: en) |
|
VOICE_LOG_FILE Log file path (default: /tmp/voice.log) |
|
VOICE_EMACS_VERBOSE Show emacsclient output (set to 1) |
|
|
|
EOF |
|
} |
|
|
|
show_help() { |
|
print_help |
|
exit 0 |
|
} |
|
|
|
usage_error() { |
|
echo "Error: $1" >&2 |
|
print_help >&2 |
|
exit 1 |
|
} |
|
|
|
MODEL="${WHISPER_MODEL:-base}" |
|
LANG="${WHISPER_LANG:-en}" |
|
RESULT_FILE="/tmp/voice_result_${$}.txt" |
|
LOG_FILE="${VOICE_LOG_FILE:-/tmp/voice.log}" |
|
EMACS_VERBOSE="${VOICE_EMACS_VERBOSE:-0}" |
|
|
|
# Parse args |
|
DURATION="" |
|
TARGET="" |
|
CLIPBOARD=false |
|
LOOP=false |
|
unknown_args=() |
|
|
|
# No arguments: show brief usage |
|
if [ $# -eq 0 ]; then |
|
echo "voice - Voice-to-text using Whisper. Use --help for options." >&2 |
|
echo "" >&2 |
|
fi |
|
|
|
for arg in "$@"; do |
|
case "$arg" in |
|
-h|--help) show_help ;; |
|
[0-9]*) DURATION="$arg" ;; |
|
-c|--clipboard) CLIPBOARD=true ;; |
|
-l|--loop) LOOP=true ;; |
|
claude|codex|gemini|aider|gpt|emacs|org|log|stdout) TARGET="$arg" ;; |
|
*) unknown_args+=("$arg") ;; |
|
esac |
|
done |
|
|
|
if [ ${#unknown_args[@]} -gt 0 ]; then |
|
usage_error "Unknown argument(s): ${unknown_args[*]}" |
|
fi |
|
|
|
run_emacsclient() { |
|
if [ "$EMACS_VERBOSE" = "1" ]; then |
|
emacsclient --eval "$1" |
|
else |
|
emacsclient --eval "$1" >/dev/null |
|
fi |
|
} |
|
|
|
transcribe() { |
|
local tmpwav="$1" |
|
local tmptxt="/tmp/voice_$$.txt" |
|
local resultfile="$RESULT_FILE" |
|
rm -f "$tmptxt" "$resultfile" |
|
|
|
# Check if we got any audio |
|
if [ ! -f "$tmpwav" ] || [ ! -s "$tmpwav" ]; then |
|
return 1 |
|
fi |
|
|
|
echo "Transcribing..." >&2 |
|
whisper "$tmpwav" --model "$MODEL" --language "$LANG" --output_format txt --output_dir /tmp >/dev/null 2>&1 |
|
|
|
if [ -f "$tmptxt" ]; then |
|
local result |
|
result=$(grep -v '^\[' "$tmptxt" | tr '\n' ' ' | sed 's/ */ /g; s/^ //; s/ $//') |
|
rm -f "$tmpwav" "$tmptxt" |
|
echo "$result" > "$resultfile" # Save for retrieval |
|
echo "$result" # For piping (may be lost after signal) |
|
return 0 |
|
else |
|
rm -f "$tmpwav" |
|
return 1 |
|
fi |
|
} |
|
|
|
record_and_transcribe() { |
|
local tmpwav="/tmp/voice_$$.wav" |
|
rm -f "$tmpwav" |
|
|
|
# Remap Ctrl+G to SIGQUIT for stopping (avoids bash SIGINT special handling) |
|
local old_stty="" |
|
local stty_enabled=false |
|
if [ -t 0 ]; then |
|
old_stty=$(stty -g 2>/dev/null) |
|
stty quit '^G' |
|
stty_enabled=true |
|
fi |
|
|
|
echo "" >&2 |
|
echo "🎤 Recording... (Ctrl+G to stop)" >&2 |
|
|
|
# Run rec in background |
|
if [ -n "$DURATION" ]; then |
|
rec -r 16000 -c 1 "$tmpwav" trim 0 "$DURATION" 2>/dev/null & |
|
else |
|
rec -r 16000 -c 1 "$tmpwav" 2>/dev/null & |
|
fi |
|
local rec_pid=$! |
|
|
|
# Trap SIGQUIT to stop recording gracefully |
|
trap 'kill -INT $rec_pid 2>/dev/null' QUIT |
|
while kill -0 $rec_pid 2>/dev/null; do |
|
sleep 0.1 2>/dev/null || sleep 1 |
|
done |
|
trap - QUIT |
|
if $stty_enabled && [ -n "$old_stty" ]; then |
|
stty "$old_stty" 2>/dev/null |
|
fi |
|
echo "" >&2 |
|
|
|
transcribe "$tmpwav" |
|
} |
|
|
|
send_to_target() { |
|
local text="$1" |
|
echo ">>> $text" >&2 |
|
case "$TARGET" in |
|
claude) ~/.claude/local/claude -p "$text" --print ;; |
|
codex) codex "$text" ;; |
|
gemini) gemini "$text" ;; |
|
aider) aider --message "$text" ;; |
|
gpt) gpt "$text" ;; |
|
emacs) run_emacsclient "(insert \"$(printf '%s' "$text" | sed 's/\\/\\\\/g; s/"/\\"/g')\")" ;; |
|
org) run_emacsclient "(progn (org-capture nil \"v\") (insert \"$(printf '%s' "$text" | sed 's/\\/\\\\/g; s/"/\\"/g')\") (org-capture-finalize))" ;; |
|
log) echo "$text" >> "$LOG_FILE" ;; |
|
stdout) echo "$text" ;; |
|
esac |
|
} |
|
|
|
handle_text() { |
|
local TEXT="$1" |
|
if [ -z "$TEXT" ]; then |
|
return |
|
fi |
|
|
|
if $CLIPBOARD; then |
|
echo "$TEXT" | pbcopy |
|
echo "Copied: $TEXT" >&2 |
|
elif [ -n "$TARGET" ]; then |
|
send_to_target "$TEXT" |
|
else |
|
echo "$TEXT" |
|
fi |
|
} |
|
|
|
# Single-shot mode |
|
if ! $LOOP; then |
|
# Trap SIGQUIT in parent to prevent exit during subshell capture |
|
trap '' QUIT |
|
TEXT=$(record_and_transcribe) |
|
trap - QUIT |
|
# If stdout capture failed (signal case), read from result file |
|
if [ -z "$TEXT" ] && [ -f "$RESULT_FILE" ]; then |
|
TEXT=$(cat "$RESULT_FILE") |
|
rm -f "$RESULT_FILE" |
|
fi |
|
if [ -z "$TEXT" ]; then |
|
echo "Transcription failed" >&2 |
|
exit 1 |
|
fi |
|
handle_text "$TEXT" |
|
exit 0 |
|
fi |
|
|
|
# Loop mode with Ctrl+G toggle |
|
TMPWAV="/tmp/voice_$$.wav" |
|
REC_PID="" |
|
RECORDING=false |
|
OLD_STTY="" |
|
|
|
cleanup() { |
|
[ -n "$REC_PID" ] && kill "$REC_PID" 2>/dev/null |
|
rm -f "$TMPWAV" "/tmp/voice_$$.txt" "$RESULT_FILE" |
|
# Restore terminal settings |
|
[ -n "$OLD_STTY" ] && stty "$OLD_STTY" 2>/dev/null |
|
} |
|
trap cleanup EXIT |
|
|
|
# Save terminal settings and remap Ctrl+G to SIGQUIT |
|
if ! [ -t 0 ]; then |
|
echo "Loop mode requires a TTY." >&2 |
|
exit 1 |
|
fi |
|
OLD_STTY=$(stty -g) |
|
stty quit '^G' |
|
|
|
echo "Voice loop mode. Ctrl+G to start/stop recording, Ctrl+C to exit." >&2 |
|
echo "" >&2 |
|
echo "⏸ Ready. Press Ctrl+G to start recording..." >&2 |
|
|
|
toggle_recording() { |
|
if $RECORDING; then |
|
# Stop recording |
|
RECORDING=false |
|
if [ -n "$REC_PID" ]; then |
|
kill "$REC_PID" 2>/dev/null |
|
wait "$REC_PID" 2>/dev/null |
|
REC_PID="" |
|
fi |
|
# Transcribe and handle |
|
TEXT=$(transcribe "$TMPWAV") |
|
handle_text "$TEXT" |
|
echo "" >&2 |
|
echo "⏸ Ready. Press Ctrl+G to start recording..." >&2 |
|
else |
|
# Start recording |
|
RECORDING=true |
|
rm -f "$TMPWAV" |
|
echo "" >&2 |
|
echo "🎤 Recording... (Ctrl+G to stop)" >&2 |
|
rec -r 16000 -c 1 "$TMPWAV" 2>/dev/null & |
|
REC_PID=$! |
|
fi |
|
} |
|
|
|
trap toggle_recording QUIT |
|
trap 'echo ""; echo "Goodbye!" >&2; exit 0' INT |
|
|
|
# Wait forever, signals do the work |
|
while true; do |
|
sleep 1 |
|
done |