Skip to content

Instantly share code, notes, and snippets.

@mplekh
Last active March 24, 2026 16:24
Show Gist options
  • Select an option

  • Save mplekh/df2ea417b3f04545c2e3e91f9148a118 to your computer and use it in GitHub Desktop.

Select an option

Save mplekh/df2ea417b3f04545c2e3e91f9148a118 to your computer and use it in GitHub Desktop.
Calculate a comfortable playback speed for YouTube videos
import re
import numpy as np
from youtube_transcript_api import YouTubeTranscriptApi
def fetch_transcript(video_id: str):
api = YouTubeTranscriptApi()
transcript_list = api.list(video_id)
# 1. Try manual English
try:
return transcript_list.find_manually_created_transcript(['en']).fetch()
except:
pass
# 2. Try generated English
try:
return transcript_list.find_generated_transcript(['en']).fetch()
except:
pass
# 3. Try any generated transcript and translate to English
for t in transcript_list:
if t.is_generated:
try:
return t.translate('en').fetch()
except:
continue
# 4. Fallback: just take first available transcript (any language)
for t in transcript_list:
try:
return t.fetch()
except:
continue
raise RuntimeError("No usable transcript found")
def count_words(text: str) -> int:
# robust tokenization (removes punctuation, keeps words)
words = re.findall(r"\b\w+\b", text.lower())
return len(words)
def compute_stats(transcript, pause_threshold=0.5):
total_words = 0
first_start = None
last_end = 0.0
total_gap_time = 0.0
prev_end = None
for seg in transcript:
text = seg.text
start = seg.start
duration = seg.duration
end = start + duration
words = count_words(text)
total_words += words
if first_start is None:
first_start = start
if prev_end is not None:
gap = start - prev_end
if gap > pause_threshold:
total_gap_time += gap
prev_end = end
last_end = max(last_end, end)
total_time = last_end - first_start
articulation_time = total_time - total_gap_time
raw_wpm = total_words / (total_time / 60.0)
articulation_wpm = total_words / (articulation_time / 60.0)
return {
"words": total_words,
"total_time_sec": total_time,
"articulation_time_sec": articulation_time,
"raw_wpm": raw_wpm,
"articulation_wpm": articulation_wpm,
}
def estimate_playback_factor(wpm, baseline=170):
"""
baseline: target 'natural' WPM
"""
ratio = wpm / baseline
return 1.0 / ratio
def analyze(video_id: str):
transcript = fetch_transcript(video_id)
stats = compute_stats(transcript)
raw_factor = estimate_playback_factor(stats["raw_wpm"])
articulation_factor = estimate_playback_factor(stats["articulation_wpm"])
print("\n=== Speech Analysis ===")
print(f"Total words: {stats['words']}")
print(f"Total duration: {stats['total_time_sec']:.2f} sec")
print(f"\nRaw WPM: {stats['raw_wpm']:.1f}")
print(f"Articulation WPM: {stats['articulation_wpm']:.1f}")
print("\n--- Suggested Playback Speeds ---")
print(f"Based on raw WPM: {raw_factor:.2f}x")
print(f"Based on articulation: {articulation_factor:.2f}x")
return stats
if __name__ == "__main__":
# example: https://www.youtube.com/watch?v=kwSVtQ7dziU
video_id = "kwSVtQ7dziU"
analyze(video_id)
@forero94
Copy link
Copy Markdown

Interesting

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment