Skip to content

Instantly share code, notes, and snippets.

@simonespa
Created March 6, 2026 17:13
Show Gist options
  • Select an option

  • Save simonespa/38f85b0fdf73d902394668030265b122 to your computer and use it in GitHub Desktop.

Select an option

Save simonespa/38f85b0fdf73d902394668030265b122 to your computer and use it in GitHub Desktop.
Transformers Speech Recognition Script
import sys
import numpy as np
from transformers import pipeline
from transformers.pipelines.audio_utils import ffmpeg_microphone_live
from curses import wrapper
import curses
pipe = pipeline("automatic-speech-recognition", model="openai/whisper-base", device=0)
sampling_rate = pipe.feature_extractor.sampling_rate
chunk_length_s = 5
stream_chunk_s = 0.1
mic = ffmpeg_microphone_live(
sampling_rate=sampling_rate,
chunk_length_s=chunk_length_s,
stream_chunk_s=stream_chunk_s, # , stride_length_s=(1, 0.1)
)
print("Start talking...")
stdscr = curses.initscr()
curses.noecho()
curses.cbreak()
text = ""
for item in pipe(mic):
displayed = text + item["text"]
if not item["partial"][0]:
text += item["text"]
stdscr.addstr(0, 0, displayed)
stdscr.clrtoeol()
stdscr.refresh()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment