Skip to content

Instantly share code, notes, and snippets.

@josmithiii
Last active January 20, 2026 17:45
Show Gist options
  • Select an option

  • Save josmithiii/607f6090dd9599763d6cc109280abe63 to your computer and use it in GitHub Desktop.

Select an option

Save josmithiii/607f6090dd9599763d6cc109280abe63 to your computer and use it in GitHub Desktop.
Voice-to-text using Whisper with AI assistant and Emacs integrations

voice

Voice-to-text transcription using OpenAI Whisper, with integrations for AI coding assistants and Emacs.

Requirements

  • openai-whisper: pip install openai-whisper
  • sox: brew install sox (for the rec command)
  • ffmpeg: brew install ffmpeg

Installation

curl -o ~/.local/bin/voice https://gist.githubusercontent.com/josmithiii/607f6090dd9599763d6cc109280abe63/raw/voice
chmod +x ~/.local/bin/voice

Usage

voice                 # Record and print transcription
voice -c              # Record and copy to clipboard
voice 10              # Record for 10 seconds
voice claude          # Record and send to Claude Code
voice -l -c           # Loop mode with clipboard (Ctrl+G toggles, Ctrl+C exits)
voice -l emacs        # Loop mode inserting into Emacs at point
voice -l org          # Loop mode capturing to org-mode

Targets

Target Description
claude Send to Claude Code
codex Send to Codex
gemini Send to Gemini
aider Send to Aider
gpt Send to GPT
emacs Insert at point (requires server)
org Capture as org-mode voice note

Environment Variables

  • WHISPER_MODEL: Whisper model to use (default: base). Options: tiny, base, small, medium, large
  • WHISPER_LANG: Language code (default: en)

Loop Mode Controls

  • Ctrl+G: Toggle recording on/off
  • Ctrl+C: Exit

Emacs Integration

Add to your init file:

(setq server-socket-dir "~/.emacs.d/server")
(unless (display-graphic-p)
  (server-start))

For org-capture support, add:

(setq org-capture-templates
      '(("v" "Voice note" entry (file+headline "~/org/voice.org" "Voice Notes")
         "* %U\n%i%?\n" :empty-lines 1)))
#!/bin/bash
# Voice-to-text using Whisper, with integrations for AI assistants and Emacs
print_help() {
cat <<'EOF'
voice - Voice-to-text using Whisper
USAGE:
voice [OPTIONS] [DURATION] [TARGET]
OPTIONS:
-h, --help Show this help message
-c, --clipboard Copy transcription to clipboard
-l, --loop Continuous mode (Ctrl+G toggles recording, Ctrl+C exits)
CONTROLS:
Single-shot: Ctrl+G stop and transcribe, Ctrl+C abort
Loop mode: Ctrl+G toggle recording, Ctrl+C exit
Note: loop mode requires a TTY (interactive terminal)
DURATION:
Number of seconds to record (optional, default: until Ctrl+G)
TARGETS:
claude Send to Claude Code
codex Send to Codex
gemini Send to Gemini
aider Send to Aider
gpt Send to GPT
emacs Insert at point in Emacs (requires server)
org Capture as org-mode voice note
log Append to log file (see VOICE_LOG_FILE)
stdout Force stdout output (same as default)
EXAMPLES:
voice Record and print transcription
voice -c Record and copy to clipboard
voice 10 Record 10 seconds, print transcription
voice claude Record and send to Claude Code
voice -l -c Loop mode: Ctrl+G to record, copies to clipboard
voice -l emacs Loop mode: insert each recording into Emacs
ENVIRONMENT:
WHISPER_MODEL Whisper model (default: base)
WHISPER_LANG Language code (default: en)
VOICE_LOG_FILE Log file path (default: /tmp/voice.log)
VOICE_EMACS_VERBOSE Show emacsclient output (set to 1)
EOF
}
show_help() {
print_help
exit 0
}
usage_error() {
echo "Error: $1" >&2
print_help >&2
exit 1
}
MODEL="${WHISPER_MODEL:-base}"
LANG="${WHISPER_LANG:-en}"
RESULT_FILE="/tmp/voice_result_${$}.txt"
LOG_FILE="${VOICE_LOG_FILE:-/tmp/voice.log}"
EMACS_VERBOSE="${VOICE_EMACS_VERBOSE:-0}"
# Parse args
DURATION=""
TARGET=""
CLIPBOARD=false
LOOP=false
unknown_args=()
# No arguments: show brief usage
if [ $# -eq 0 ]; then
echo "voice - Voice-to-text using Whisper. Use --help for options." >&2
echo "" >&2
fi
for arg in "$@"; do
case "$arg" in
-h|--help) show_help ;;
[0-9]*) DURATION="$arg" ;;
-c|--clipboard) CLIPBOARD=true ;;
-l|--loop) LOOP=true ;;
claude|codex|gemini|aider|gpt|emacs|org|log|stdout) TARGET="$arg" ;;
*) unknown_args+=("$arg") ;;
esac
done
if [ ${#unknown_args[@]} -gt 0 ]; then
usage_error "Unknown argument(s): ${unknown_args[*]}"
fi
run_emacsclient() {
if [ "$EMACS_VERBOSE" = "1" ]; then
emacsclient --eval "$1"
else
emacsclient --eval "$1" >/dev/null
fi
}
transcribe() {
local tmpwav="$1"
local tmptxt="/tmp/voice_$$.txt"
local resultfile="$RESULT_FILE"
rm -f "$tmptxt" "$resultfile"
# Check if we got any audio
if [ ! -f "$tmpwav" ] || [ ! -s "$tmpwav" ]; then
return 1
fi
echo "Transcribing..." >&2
whisper "$tmpwav" --model "$MODEL" --language "$LANG" --output_format txt --output_dir /tmp >/dev/null 2>&1
if [ -f "$tmptxt" ]; then
local result
result=$(grep -v '^\[' "$tmptxt" | tr '\n' ' ' | sed 's/ */ /g; s/^ //; s/ $//')
rm -f "$tmpwav" "$tmptxt"
echo "$result" > "$resultfile" # Save for retrieval
echo "$result" # For piping (may be lost after signal)
return 0
else
rm -f "$tmpwav"
return 1
fi
}
record_and_transcribe() {
local tmpwav="/tmp/voice_$$.wav"
rm -f "$tmpwav"
# Remap Ctrl+G to SIGQUIT for stopping (avoids bash SIGINT special handling)
local old_stty=""
local stty_enabled=false
if [ -t 0 ]; then
old_stty=$(stty -g 2>/dev/null)
stty quit '^G'
stty_enabled=true
fi
echo "" >&2
echo "🎤 Recording... (Ctrl+G to stop)" >&2
# Run rec in background
if [ -n "$DURATION" ]; then
rec -r 16000 -c 1 "$tmpwav" trim 0 "$DURATION" 2>/dev/null &
else
rec -r 16000 -c 1 "$tmpwav" 2>/dev/null &
fi
local rec_pid=$!
# Trap SIGQUIT to stop recording gracefully
trap 'kill -INT $rec_pid 2>/dev/null' QUIT
while kill -0 $rec_pid 2>/dev/null; do
sleep 0.1 2>/dev/null || sleep 1
done
trap - QUIT
if $stty_enabled && [ -n "$old_stty" ]; then
stty "$old_stty" 2>/dev/null
fi
echo "" >&2
transcribe "$tmpwav"
}
send_to_target() {
local text="$1"
echo ">>> $text" >&2
case "$TARGET" in
claude) ~/.claude/local/claude -p "$text" --print ;;
codex) codex "$text" ;;
gemini) gemini "$text" ;;
aider) aider --message "$text" ;;
gpt) gpt "$text" ;;
emacs) run_emacsclient "(insert \"$(printf '%s' "$text" | sed 's/\\/\\\\/g; s/"/\\"/g')\")" ;;
org) run_emacsclient "(progn (org-capture nil \"v\") (insert \"$(printf '%s' "$text" | sed 's/\\/\\\\/g; s/"/\\"/g')\") (org-capture-finalize))" ;;
log) echo "$text" >> "$LOG_FILE" ;;
stdout) echo "$text" ;;
esac
}
handle_text() {
local TEXT="$1"
if [ -z "$TEXT" ]; then
return
fi
if $CLIPBOARD; then
echo "$TEXT" | pbcopy
echo "Copied: $TEXT" >&2
elif [ -n "$TARGET" ]; then
send_to_target "$TEXT"
else
echo "$TEXT"
fi
}
# Single-shot mode
if ! $LOOP; then
# Trap SIGQUIT in parent to prevent exit during subshell capture
trap '' QUIT
TEXT=$(record_and_transcribe)
trap - QUIT
# If stdout capture failed (signal case), read from result file
if [ -z "$TEXT" ] && [ -f "$RESULT_FILE" ]; then
TEXT=$(cat "$RESULT_FILE")
rm -f "$RESULT_FILE"
fi
if [ -z "$TEXT" ]; then
echo "Transcription failed" >&2
exit 1
fi
handle_text "$TEXT"
exit 0
fi
# Loop mode with Ctrl+G toggle
TMPWAV="/tmp/voice_$$.wav"
REC_PID=""
RECORDING=false
OLD_STTY=""
cleanup() {
[ -n "$REC_PID" ] && kill "$REC_PID" 2>/dev/null
rm -f "$TMPWAV" "/tmp/voice_$$.txt" "$RESULT_FILE"
# Restore terminal settings
[ -n "$OLD_STTY" ] && stty "$OLD_STTY" 2>/dev/null
}
trap cleanup EXIT
# Save terminal settings and remap Ctrl+G to SIGQUIT
if ! [ -t 0 ]; then
echo "Loop mode requires a TTY." >&2
exit 1
fi
OLD_STTY=$(stty -g)
stty quit '^G'
echo "Voice loop mode. Ctrl+G to start/stop recording, Ctrl+C to exit." >&2
echo "" >&2
echo "⏸ Ready. Press Ctrl+G to start recording..." >&2
toggle_recording() {
if $RECORDING; then
# Stop recording
RECORDING=false
if [ -n "$REC_PID" ]; then
kill "$REC_PID" 2>/dev/null
wait "$REC_PID" 2>/dev/null
REC_PID=""
fi
# Transcribe and handle
TEXT=$(transcribe "$TMPWAV")
handle_text "$TEXT"
echo "" >&2
echo "⏸ Ready. Press Ctrl+G to start recording..." >&2
else
# Start recording
RECORDING=true
rm -f "$TMPWAV"
echo "" >&2
echo "🎤 Recording... (Ctrl+G to stop)" >&2
rec -r 16000 -c 1 "$TMPWAV" 2>/dev/null &
REC_PID=$!
fi
}
trap toggle_recording QUIT
trap 'echo ""; echo "Goodbye!" >&2; exit 0' INT
# Wait forever, signals do the work
while true; do
sleep 1
done
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment