Skip to content

Instantly share code, notes, and snippets.

@InTEGr8or
Last active January 24, 2026 23:46
Show Gist options
  • Select an option

  • Save InTEGr8or/72930d234a09e38e87b00cd6056adac3 to your computer and use it in GitHub Desktop.

Select an option

Save InTEGr8or/72930d234a09e38e87b00cd6056adac3 to your computer and use it in GitHub Desktop.
Global Windows AI TTS Reader (AppsKey + uv + edge-tts)

Global Windows AI TTS Reader

Select any text in Windows and have it read aloud by a modern AI voice. This setup uses AutoHotkey for shortcuts and uv/Python for high-quality, low-latency streaming TTS.

🚀 Key Features

  • Modern AI Voices: Uses Microsoft Edge's Neural TTS (en-US-AndrewNeural).
  • Zero-Latency Streaming: Audio starts playing as it streams, no waiting for full downloads.
  • Local Caching: Repeatedly read text plays instantly from the local audio_cache.
  • Atomic Dependencies: Uses uv script metadata to handle all Python requirements automatically.
  • Intelligent Logging: Automatic log rotation at 1MB to prevent bloat.

⌨️ Shortcuts

  • AppsKey (Menu Key): Read selected text aloud.
  • Ctrl + AppsKey: Force Read (Useful for Neovim/Terminals by sending 'y' + 'Ctrl+C').
  • Shift + AppsKey: Stop reading immediately (kills the player process).

🛠️ Requirements

  1. AutoHotkey v2: scoop install autohotkey
  2. Python 3.12+: scoop install python
  3. uv: scoop install uv
  4. ffmpeg: scoop install ffmpeg (Required for ffplay)

📦 Setup Instructions

  1. Save tts_reader_edge.py and GlobalTTS.ahk to your preferred script folder.
  2. Update the uvPath and scriptPath in GlobalTTS.ahk to match your local file system.
  3. Add GlobalTTS.ahk to your Windows Startup folder (shell:startup) for global availability.

🎙️ Voice Customization

To change the voice, edit tts_reader_edge.py and change the VOICE variable. Popular options:

  • en-US-AndrewNeural (Male, Very Natural)
  • en-US-AvaNeural (Female, Very Natural)
  • en-GB-SoniaNeural (Female, British)
#Requires AutoHotkey v2.0
; Global TTS Reader
; Uses explicit hotkey definitions to ensure native functions are suppressed.
; 1. Standard Read
$AppsKey::
{
HandleTTS(false)
}
; 2. Force Read (Ctrl + AppsKey)
$^AppsKey::
{
HandleTTS(true)
}
; 3. Stop (Shift + AppsKey)
$+AppsKey::
{
StopTTS()
}
HandleTTS(force)
{
oldClipboard := ClipboardAll()
A_Clipboard := ""
if (force) {
Send("y")
Sleep(50)
Send("^c")
} else {
SendEvent("^c")
}
if !ClipWait(1)
{
A_Clipboard := oldClipboard
return
}
selectedText := A_Clipboard
A_Clipboard := oldClipboard
tempFile := A_Temp . "\tts_input.txt"
try {
if FileExist(tempFile)
FileDelete(tempFile)
FileAppend(selectedText, tempFile, "UTF-8")
} catch Error as e {
return
}
ToolTip("Reading AI voice...")
SetTimer () => ToolTip(), -2000
uvPath := "C:\Users\xgenx\.local\bin\uv.exe"
scriptPath := "\\wsl$\\Ubuntu\home\mstouffer\.config\windows-tts\tts_reader_edge.py"
try {
Run(A_ComSpec . ' /c "' . uvPath . ' run --python 3.12 "' . scriptPath . '" "' . tempFile . '""', , "Hide")
}
; Wait for key release to prevent menu "leakage"
KeyWait("AppsKey")
}
StopTTS()
{
try {
Run(A_ComSpec . " /c taskkill /F /IM ffplay.exe", , "Hide")
ToolTip("Stopped TTS")
SetTimer () => ToolTip(), -1000
}
KeyWait("AppsKey")
}
# /// script
# dependencies = [
# "edge-tts",
# ]
# ///
import os
import sys
import asyncio
import hashlib
import subprocess
import edge_tts
from datetime import datetime
# Logging and Cache setup
BASE_DIR = os.path.dirname(__file__)
LOG_FILE = os.path.join(BASE_DIR, "debug.log")
CACHE_DIR = os.path.join(BASE_DIR, "audio_cache")
os.makedirs(CACHE_DIR, exist_ok=True)
def log(message):
# Log rotation: If file > 1MB, clear it
if os.path.exists(LOG_FILE) and os.path.getsize(LOG_FILE) > 1024 * 1024:
with open(LOG_FILE, "w", encoding="utf-8") as f:
f.write(f"--- Log Rotated at {datetime.now()} ---\n")
with open(LOG_FILE, "a", encoding="utf-8") as f:
f.write(f"{datetime.now()}: {message}\n")
async def amain(text) -> None:
text_hash = hashlib.md5(text.encode('utf-8')).hexdigest()
cache_path = os.path.join(CACHE_DIR, f"{text_hash}.mp3")
VOICE = "en-US-AndrewNeural"
# 1. Check if it's already in the cache
if os.path.exists(cache_path):
log(f"Cache hit: {text_hash}")
# Use subprocess.run for cached files as it's simpler
subprocess.run(["ffplay", "-nodisp", "-autoexit", cache_path],
capture_output=True, text=True)
return
log(f"Cache miss. Streaming TTS for hash: {text_hash}")
try:
communicate = edge_tts.Communicate(text, VOICE)
# 2. Start ffplay reading from stdin for zero-latency streaming
# Reverting stderr to DEVNULL to avoid potential deadlocks
player = subprocess.Popen(
["ffplay", "-nodisp", "-autoexit", "-"],
stdin=subprocess.PIPE,
stderr=subprocess.DEVNULL,
stdout=subprocess.DEVNULL
)
with open(cache_path, "wb") as cache_file:
async for chunk in communicate.stream():
if chunk["type"] == "audio":
data = chunk["data"]
try:
player.stdin.write(data)
cache_file.write(data)
except BrokenPipeError:
log("Broken pipe: ffplay likely closed.")
break
if player.stdin:
player.stdin.close()
log("Streaming loop finished.")
player.wait()
except Exception as e:
log(f"Exception in amain: {str(e)}")
# If streaming fails, we might have a partial file, so clean it up
if os.path.exists(cache_path):
os.unlink(cache_path)
if __name__ == "__main__":
if len(sys.argv) < 2:
sys.exit(1)
input_arg = sys.argv[1]
if os.path.isfile(input_arg):
try:
with open(input_arg, "r", encoding="utf-8") as f:
text_to_read = f.read()
except Exception as e:
log(f"Error reading file {input_arg}: {e}")
sys.exit(1)
else:
text_to_read = input_arg
if text_to_read.strip():
asyncio.run(amain(text_to_read))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment