Created
January 18, 2026 09:05
-
-
Save bbaranoff/f69e5fa686c95b06088da9bc34d0fd7c to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env bash | |
| set -euo pipefail | |
| # video2srt_fw.sh | |
| # Convertit video -> .srt via ffmpeg + faster-whisper (CUDA), | |
| # avec option --stream pour afficher les sous-titres au fur et a mesure. | |
| die() { echo "ERR: $*" >&2; exit 1; } | |
| need() { command -v "$1" >/dev/null 2>&1 || die "Commande manquante: $1"; } | |
| usage() { | |
| cat <<'EOF' | |
| Usage: | |
| ./video2srt_fw.sh [--stream] <video_file> | |
| Env options: | |
| MODEL=small|medium|large-v3 | |
| LANG=fr|en|auto | |
| DEVICE=cuda|cpu | |
| COMPUTE=float16|int8_float16|int8 | |
| BEAM=1|2|... | |
| VAD=1|0 | |
| MIN_SIL_MS=500 | |
| Examples: | |
| ./video2srt_fw.sh input.mp4 | |
| ./video2srt_fw.sh --stream input.mkv | |
| MODEL=medium LANG=fr ./video2srt_fw.sh --stream input.webm | |
| EOF | |
| } | |
| need ffmpeg | |
| need python3 | |
| STREAM=0 | |
| if [[ "${1:-}" == "--stream" ]]; then | |
| STREAM=1 | |
| shift | |
| fi | |
| IN="${1:-}" | |
| [[ -n "${IN}" ]] || { usage; exit 1; } | |
| [[ -f "${IN}" ]] || die "Fichier introuvable: ${IN}" | |
| # Parametres (env) | |
| MODEL="${MODEL:-small}" | |
| LANG="${LANG:-auto}" | |
| DEVICE="${DEVICE:-cuda}" | |
| COMPUTE="${COMPUTE:-float16}" | |
| BEAM="${BEAM:-1}" | |
| VAD="${VAD:-1}" | |
| MIN_SIL_MS="${MIN_SIL_MS:-500}" | |
| IN_ABS="$(readlink -f "${IN}")" | |
| DIR="$(dirname "${IN_ABS}")" | |
| BASE="$(basename "${IN_ABS}")" | |
| STEM="${BASE%.*}" | |
| OUT_SRT="${DIR}/${STEM}.srt" | |
| TMPDIR="$(mktemp -d)" | |
| cleanup() { rm -rf "${TMPDIR}"; } | |
| trap cleanup EXIT | |
| AUDIO_WAV="${TMPDIR}/${STEM}.wav" | |
| ffmpeg -y -i "${IN_ABS}" -vn -ac 1 -ar 16000 -c:a pcm_s16le "${AUDIO_WAV}" >/dev/null 2>&1 | |
| python3 - "${AUDIO_WAV}" "${OUT_SRT}" "${STREAM}" <<'PY' | |
| import sys, os, time | |
| from faster_whisper import WhisperModel | |
| audio = sys.argv[1] | |
| out_srt = sys.argv[2] | |
| stream = sys.argv[3] == "1" | |
| model_name = os.environ.get("MODEL", "small") | |
| lang = os.environ.get("LANG", "auto") | |
| device = os.environ.get("DEVICE", "cuda") | |
| compute = os.environ.get("COMPUTE", "float16") | |
| beam = int(os.environ.get("BEAM", "1")) | |
| vad = os.environ.get("VAD", "1") == "1" | |
| min_sil_ms = int(os.environ.get("MIN_SIL_MS", "500")) | |
| def srt_ts(t: float) -> str: | |
| if t < 0: | |
| t = 0.0 | |
| ms = int(round(t * 1000.0)) | |
| h = ms // 3600000 | |
| ms -= h * 3600000 | |
| m = ms // 60000 | |
| ms -= m * 60000 | |
| s = ms // 1000 | |
| ms -= s * 1000 | |
| return f"{h:02d}:{m:02d}:{s:02d},{ms:03d}" | |
| wm = WhisperModel(model_name, device=device, compute_type=compute) | |
| segments, info = wm.transcribe( | |
| audio, | |
| language=None if lang == "auto" else lang, | |
| vad_filter=vad, | |
| vad_parameters={"min_silence_duration_ms": min_sil_ms} if vad else None, | |
| beam_size=beam, | |
| ) | |
| # Ecrit au fur et a mesure (flush) + option stream (stdout) | |
| with open(out_srt, "w", encoding="utf-8") as f: | |
| i = 1 | |
| for seg in segments: | |
| text = (seg.text or "").strip() | |
| if not text: | |
| continue | |
| block = ( | |
| f"{i}\n" | |
| f"{srt_ts(seg.start)} --> {srt_ts(seg.end)}\n" | |
| f"{text}\n\n" | |
| ) | |
| f.write(block) | |
| f.flush() | |
| if stream: | |
| # sortie simple lisible, sans ANSI | |
| sys.stdout.write(block) | |
| sys.stdout.flush() | |
| i += 1 | |
| print(f"OK: wrote {out_srt}", file=sys.stderr) | |
| PY | |
| echo "Done: ${OUT_SRT}" |
Author
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
VIDEO2SRT_FW
Conversion locale de n'importe quel fichier video en sous-titres SRT
en utilisant faster-whisper avec acceleration CUDA.
Option de streaming pour afficher les sous-titres au fur et a mesure
de leur generation.
Pas de cloud. Pas d'API. Pas de marketing. Juste du calcul.
FONCTIONNALITES
PREREQUIS
SYSTEME
Verification GPU :
nvidia-smi
PYTHON
Installation :
pip install faster-whisper
INSTALLATION
Rendre le script executable :
chmod +x video2srt_fw.sh
Aucune autre configuration requise.
UTILISATION
UTILISATION SIMPLE
./video2srt_fw.sh video.mp4
Genere :
video.srt
dans le meme dossier.
MODE STREAMING (AFFICHAGE TEMPS REEL)
./video2srt_fw.sh --stream video.mkv
VARIABLES D'ENVIRONNEMENT
MODEL : small (defaut) | medium | large-v3
LANG : auto (defaut) | fr | en
DEVICE : cuda (defaut) | cpu
COMPUTE : float16 (defaut) | int8_float16
BEAM : 1 (rapide)
VAD : 1 (active)
MIN_SIL_MS : 500
EXEMPLE RTX 4090 :
MODEL=medium LANG=fr BEAM=1 VAD=1 ./video2srt_fw.sh --stream video.mp4
UTILISATION AVEC VLC
AUTOMATIQUE (RECOMMANDE)
Exemple :
video.mp4
video.srt
Lancement :
vlc video.mp4
LIGNE DE COMMANDE
vlc video.mp4 --sub-file=video.srt
AJUSTER LE DECALAGE DES SOUS-TITRES
Ou :
vlc video.mp4 --sub-delay=0.5
PERFORMANCES (INDICATIF)
GPU : RTX 4090
small : ~10x a 15x temps reel
medium : ~4x a 6x
large-v3 : ~1x a 2x
VAD active = gain significatif.
DEPANNAGE
MODE STREAM NE S'AFFICHE PAS
CUDA NON UTILISE
Essayer :
COMPUTE=int8_float16 DEVICE=cuda ./video2srt_fw.sh video.mp4
Verifier :
pip show faster-whisper ctranslate2
PIPELINE TECHNIQUE
Video
|
v
ffmpeg (mono / 16 kHz)
|
v
Audio WAV
|
v
faster-whisper (CUDA)
|
v
Segments
|
v
SRT (ecriture + affichage stream)
NOTES
reutiliser le SRT dans Ollama ou autre LLM
LICENCE
Script libre.
Whisper / faster-whisper soumis a leurs licences respectives.