Last active
March 24, 2026 16:24
-
-
Save mplekh/df2ea417b3f04545c2e3e91f9148a118 to your computer and use it in GitHub Desktop.
Calculate a comfortable playback speed for YouTube videos
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import re | |
| import numpy as np | |
| from youtube_transcript_api import YouTubeTranscriptApi | |
| def fetch_transcript(video_id: str): | |
| api = YouTubeTranscriptApi() | |
| transcript_list = api.list(video_id) | |
| # 1. Try manual English | |
| try: | |
| return transcript_list.find_manually_created_transcript(['en']).fetch() | |
| except: | |
| pass | |
| # 2. Try generated English | |
| try: | |
| return transcript_list.find_generated_transcript(['en']).fetch() | |
| except: | |
| pass | |
| # 3. Try any generated transcript and translate to English | |
| for t in transcript_list: | |
| if t.is_generated: | |
| try: | |
| return t.translate('en').fetch() | |
| except: | |
| continue | |
| # 4. Fallback: just take first available transcript (any language) | |
| for t in transcript_list: | |
| try: | |
| return t.fetch() | |
| except: | |
| continue | |
| raise RuntimeError("No usable transcript found") | |
| def count_words(text: str) -> int: | |
| # robust tokenization (removes punctuation, keeps words) | |
| words = re.findall(r"\b\w+\b", text.lower()) | |
| return len(words) | |
| def compute_stats(transcript, pause_threshold=0.5): | |
| total_words = 0 | |
| first_start = None | |
| last_end = 0.0 | |
| total_gap_time = 0.0 | |
| prev_end = None | |
| for seg in transcript: | |
| text = seg.text | |
| start = seg.start | |
| duration = seg.duration | |
| end = start + duration | |
| words = count_words(text) | |
| total_words += words | |
| if first_start is None: | |
| first_start = start | |
| if prev_end is not None: | |
| gap = start - prev_end | |
| if gap > pause_threshold: | |
| total_gap_time += gap | |
| prev_end = end | |
| last_end = max(last_end, end) | |
| total_time = last_end - first_start | |
| articulation_time = total_time - total_gap_time | |
| raw_wpm = total_words / (total_time / 60.0) | |
| articulation_wpm = total_words / (articulation_time / 60.0) | |
| return { | |
| "words": total_words, | |
| "total_time_sec": total_time, | |
| "articulation_time_sec": articulation_time, | |
| "raw_wpm": raw_wpm, | |
| "articulation_wpm": articulation_wpm, | |
| } | |
| def estimate_playback_factor(wpm, baseline=170): | |
| """ | |
| baseline: target 'natural' WPM | |
| """ | |
| ratio = wpm / baseline | |
| return 1.0 / ratio | |
| def analyze(video_id: str): | |
| transcript = fetch_transcript(video_id) | |
| stats = compute_stats(transcript) | |
| raw_factor = estimate_playback_factor(stats["raw_wpm"]) | |
| articulation_factor = estimate_playback_factor(stats["articulation_wpm"]) | |
| print("\n=== Speech Analysis ===") | |
| print(f"Total words: {stats['words']}") | |
| print(f"Total duration: {stats['total_time_sec']:.2f} sec") | |
| print(f"\nRaw WPM: {stats['raw_wpm']:.1f}") | |
| print(f"Articulation WPM: {stats['articulation_wpm']:.1f}") | |
| print("\n--- Suggested Playback Speeds ---") | |
| print(f"Based on raw WPM: {raw_factor:.2f}x") | |
| print(f"Based on articulation: {articulation_factor:.2f}x") | |
| return stats | |
| if __name__ == "__main__": | |
| # example: https://www.youtube.com/watch?v=kwSVtQ7dziU | |
| video_id = "kwSVtQ7dziU" | |
| analyze(video_id) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Interesting