Created
December 19, 2024 11:14
-
-
Save christianbaumann/b42aec95a903d62b24cbc08f50f1763d to your computer and use it in GitHub Desktop.
A Python script that transcribes audio from a video file using OpenAI's Whisper model and saves the transcription as a text file.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import whisper | |
| import ffmpeg | |
| import os | |
| def main(video_file, language="en"): | |
| # Load the Whisper model | |
| print("Loading Whisper model...") | |
| model = whisper.load_model("base") | |
| # Paths for the audio and output text filese | |
| audio_file = "extracted_audio.wav" | |
| # output_file = os.path.splitext(video_file)[0] + ".txt" | |
| output_file = video_file + ".txt" | |
| # Extract audio from the video file | |
| print("Extracting audio from video file...") | |
| ffmpeg.input(video_file).output(audio_file).run(overwrite_output=True, quiet=True) | |
| # Transcribe the extracted audio file | |
| print("Transcribing audio...") | |
| result = model.transcribe(audio_file, language=language) | |
| # Save the transcription to a text file with UTF-8 encoding | |
| print(f"Saving transcription to {output_file}...") | |
| with open(output_file, "w", encoding="utf-8") as f: | |
| f.write(result["text"]) | |
| # Optionally, remove the extracted audio file if not needed | |
| os.remove(audio_file) | |
| print("Transcription completed successfully.") | |
| if __name__ == "__main__": | |
| video_file = "video_to_trancribe.mp4" # Set video file here | |
| language = "en" # Set the desired language code here | |
| main(video_file, language) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment