mplekh/ytpbspeed.py

## ytpbspeed.py
import re
import numpy as np
from youtube_transcript_api import YouTubeTranscriptApi

def fetch_transcript(video_id: str):
    api = YouTubeTranscriptApi()
    transcript_list = api.list(video_id)

    # 1. Try manual English
    try:
        return transcript_list.find_manually_created_transcript(['en']).fetch()
    except:
        pass

    # 2. Try generated English
    try:
        return transcript_list.find_generated_transcript(['en']).fetch()
    except:
        pass

    # 3. Try any generated transcript and translate to English
    for t in transcript_list:
        if t.is_generated:
            try:
                return t.translate('en').fetch()
            except:
                continue

    # 4. Fallback: just take first available transcript (any language)
    for t in transcript_list:
        try:
            return t.fetch()
        except:
            continue

    raise RuntimeError("No usable transcript found")

def count_words(text: str) -> int:
    # robust tokenization (removes punctuation, keeps words)
    words = re.findall(r"\b\w+\b", text.lower())
    return len(words)


def compute_stats(transcript, pause_threshold=0.5):
    total_words = 0
    first_start = None
    last_end = 0.0
    total_gap_time = 0.0

    prev_end = None

    for seg in transcript:
        text = seg.text
        start = seg.start
        duration = seg.duration
        end = start + duration

        words = count_words(text)
        total_words += words

        if first_start is None:
            first_start = start

        if prev_end is not None:
            gap = start - prev_end
            if gap > pause_threshold:
                total_gap_time += gap

        prev_end = end
        last_end = max(last_end, end)

    total_time = last_end - first_start
    articulation_time = total_time - total_gap_time

    raw_wpm = total_words / (total_time / 60.0)
    articulation_wpm = total_words / (articulation_time / 60.0)

    return {
        "words": total_words,
        "total_time_sec": total_time,
        "articulation_time_sec": articulation_time,
        "raw_wpm": raw_wpm,
        "articulation_wpm": articulation_wpm,
    }

def estimate_playback_factor(wpm, baseline=170):
    """
    baseline: target 'natural' WPM
    """
    ratio = wpm / baseline
    return 1.0 / ratio


def analyze(video_id: str):
    transcript = fetch_transcript(video_id)
    stats = compute_stats(transcript)

    raw_factor = estimate_playback_factor(stats["raw_wpm"])
    articulation_factor = estimate_playback_factor(stats["articulation_wpm"])

    print("\n=== Speech Analysis ===")
    print(f"Total words: {stats['words']}")
    print(f"Total duration: {stats['total_time_sec']:.2f} sec")

    print(f"\nRaw WPM: {stats['raw_wpm']:.1f}")
    print(f"Articulation WPM: {stats['articulation_wpm']:.1f}")

    print("\n--- Suggested Playback Speeds ---")
    print(f"Based on raw WPM: {raw_factor:.2f}x")
    print(f"Based on articulation: {articulation_factor:.2f}x")

    return stats


if __name__ == "__main__":
    # example: https://www.youtube.com/watch?v=kwSVtQ7dziU
    video_id = "kwSVtQ7dziU"
    analyze(video_id)
	import re
	import numpy as np
	from youtube_transcript_api import YouTubeTranscriptApi

	def fetch_transcript(video_id: str):
	api = YouTubeTranscriptApi()
	transcript_list = api.list(video_id)

	# 1. Try manual English
	try:
	return transcript_list.find_manually_created_transcript(['en']).fetch()
	except:
	pass

	# 2. Try generated English
	try:
	return transcript_list.find_generated_transcript(['en']).fetch()
	except:
	pass

	# 3. Try any generated transcript and translate to English
	for t in transcript_list:
	if t.is_generated:
	try:
	return t.translate('en').fetch()
	except:
	continue

	# 4. Fallback: just take first available transcript (any language)
	for t in transcript_list:
	try:
	return t.fetch()
	except:
	continue

	raise RuntimeError("No usable transcript found")

	def count_words(text: str) -> int:
	# robust tokenization (removes punctuation, keeps words)
	words = re.findall(r"\b\w+\b", text.lower())
	return len(words)


	def compute_stats(transcript, pause_threshold=0.5):
	total_words = 0
	first_start = None
	last_end = 0.0
	total_gap_time = 0.0

	prev_end = None

	for seg in transcript:
	text = seg.text
	start = seg.start
	duration = seg.duration
	end = start + duration

	words = count_words(text)
	total_words += words

	if first_start is None:
	first_start = start

	if prev_end is not None:
	gap = start - prev_end
	if gap > pause_threshold:
	total_gap_time += gap

	prev_end = end
	last_end = max(last_end, end)

	total_time = last_end - first_start
	articulation_time = total_time - total_gap_time

	raw_wpm = total_words / (total_time / 60.0)
	articulation_wpm = total_words / (articulation_time / 60.0)

	return {
	"words": total_words,
	"total_time_sec": total_time,
	"articulation_time_sec": articulation_time,
	"raw_wpm": raw_wpm,
	"articulation_wpm": articulation_wpm,
	}

	def estimate_playback_factor(wpm, baseline=170):
	"""
	baseline: target 'natural' WPM
	"""
	ratio = wpm / baseline
	return 1.0 / ratio


	def analyze(video_id: str):
	transcript = fetch_transcript(video_id)
	stats = compute_stats(transcript)

	raw_factor = estimate_playback_factor(stats["raw_wpm"])
	articulation_factor = estimate_playback_factor(stats["articulation_wpm"])

	print("\n=== Speech Analysis ===")
	print(f"Total words: {stats['words']}")
	print(f"Total duration: {stats['total_time_sec']:.2f} sec")

	print(f"\nRaw WPM: {stats['raw_wpm']:.1f}")
	print(f"Articulation WPM: {stats['articulation_wpm']:.1f}")

	print("\n--- Suggested Playback Speeds ---")
	print(f"Based on raw WPM: {raw_factor:.2f}x")
	print(f"Based on articulation: {articulation_factor:.2f}x")

	return stats


	if __name__ == "__main__":
	# example: https://www.youtube.com/watch?v=kwSVtQ7dziU
	video_id = "kwSVtQ7dziU"
	analyze(video_id)
No results found