Created
January 20, 2026 12:27
-
-
Save ppo/cf6c660e5e828cec611f3700728d370e to your computer and use it in GitHub Desktop.
Transcribe files (audio or video, to TXT or SRT). Using Whisper.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python | |
| """Transcribe files (audio or video, to TXT or SRT). | |
| AI Models: https://github.com/openai/whisper | |
| Requirements: | |
| ``` | |
| brew install ffmpeg | |
| pip install openai-whisper | |
| ``` | |
| """ | |
| import argparse | |
| import time | |
| from datetime import timedelta | |
| from pathlib import Path | |
| import whisper | |
| # Arg choices | |
| MODEL_CHOICES = ["tiny", "base", "small", "medium", "large", "turbo"] # See https://github.com/openai/whisper?tab=readme-ov-file#available-models-and-languages | |
| FORMAT_CHOICES=["txt", "srt"] | |
| # Arg defaults | |
| DEFAULT_MODEL = "large" | |
| DEFAULT_FORMAT = "txt" | |
| DEFAULT_LANGUAGE = "en" | |
| # Colors | |
| C_HEADING = "\033[1;33m" | |
| C_SUCCESS = "\033[1;32m" | |
| C_ERROR = "\033[1;31m" | |
| C_RESET = "\033[0m" | |
| def heading(text): | |
| print(f"{C_HEADING}{text}{C_RESET}") | |
| def format_duration(seconds): | |
| """Format duration as `H:MM:SS`.""" | |
| hours = int(seconds // 3600) | |
| minutes = int((seconds % 3600) // 60) | |
| secs = int(seconds % 60) | |
| return f"{hours}:{minutes:02d}:{secs:02d}" | |
| def format_srt_timestamp(seconds): | |
| """Convert seconds to SRT timestamp format: `HH:MM:SS,mmm`.""" | |
| td = timedelta(seconds=seconds) | |
| hours = int(td.total_seconds() // 3600) | |
| minutes = int((td.total_seconds() % 3600) // 60) | |
| secs = int(td.total_seconds() % 60) | |
| millis = int((td.total_seconds() % 1) * 1000) | |
| return f"{hours:02d}:{minutes:02d}:{secs:02d},{millis:03d}" | |
| def save_as_srt(result, output_path): | |
| """Save result segments as SRT.""" | |
| with open(output_path, "w", encoding="utf-8") as f: | |
| for i, segment in enumerate(result["segments"], start=1): | |
| start = format_srt_timestamp(segment["start"]) | |
| end = format_srt_timestamp(segment["end"]) | |
| text = segment["text"].strip() | |
| f.write(f"{i}\n") | |
| f.write(f"{start} --> {end}\n") | |
| f.write(f"{text}\n\n") | |
| return output_path | |
| def save_as_txt(result, output_path): | |
| """Save result segments as TXT.""" | |
| with open(output_path, "w", encoding="utf-8") as f: | |
| for segment in result["segments"]: | |
| text = segment["text"].strip() | |
| f.write(f"{text}\n") | |
| def transcribe_file(path, model, language, format): | |
| """Transcribe file to the given format.""" | |
| result = model.transcribe( | |
| str(path), | |
| language=language, | |
| verbose=False, | |
| fp16=False, # Suppress warning | |
| ) | |
| if format == "srt": | |
| output_path = path.with_suffix(f".{language}.srt") | |
| save_as_srt(result, output_path) | |
| else: | |
| output_path = path.with_suffix(".txt") | |
| save_as_txt(result, output_path) | |
| return output_path | |
| def parse_args(): | |
| parser = argparse.ArgumentParser(description="Transcribe files (audio or video) using Whisper") | |
| parser.add_argument("files", nargs="+", help="Files to transcribe") | |
| parser.add_argument( | |
| "--model", | |
| default=DEFAULT_MODEL, | |
| choices=MODEL_CHOICES, | |
| help=f"Whisper model size (default: {DEFAULT_MODEL})", | |
| ) | |
| parser.add_argument( | |
| "--format", | |
| choices=FORMAT_CHOICES, | |
| default=DEFAULT_FORMAT, | |
| help="Output format: txt, srt (default: txt)", | |
| ) | |
| parser.add_argument( | |
| "--lang", | |
| dest="language", | |
| default=DEFAULT_LANGUAGE, | |
| help=f"Audio language (default: {DEFAULT_LANGUAGE})", | |
| ) | |
| return parser.parse_args() | |
| def main(): | |
| args = parse_args() | |
| print(f"Loading Whisper model '{args.model}'…") | |
| model = whisper.load_model(args.model) | |
| for file in args.files: | |
| path = Path(file) | |
| if not path.exists(): | |
| print(f"{C_ERROR}Error: {file} not found{C_RESET}") | |
| continue | |
| print() | |
| heading(f"Transcribing: {path.name}") | |
| start_time = time.time() | |
| output_path = transcribe_file(path, model, args.language, args.format) | |
| duration = time.time() - start_time | |
| duration_str = format_duration(duration) | |
| print(f" {C_SUCCESS}{args.format.upper()}: {output_path.name}{C_RESET}") | |
| print(f" Processed in {duration_str}") | |
| print() | |
| if __name__ == "__main__": | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment