Created
January 18, 2026 21:38
-
-
Save notexactlyawe/ae37f21b5b7000da79e27d39df9488f3 to your computer and use it in GitHub Desktop.
Download subtitles from YouTube playlist
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| """ | |
| Download non-auto-generated English subtitles from a YouTube playlist. | |
| This script uses yt-dlp to download only manually created (non-auto-generated) | |
| English language subtitles for all videos in a YouTube playlist. | |
| """ | |
| import argparse | |
| import subprocess | |
| import sys | |
| import os | |
| import glob | |
| def download_subtitles( | |
| playlist_url: str, | |
| output_dir: str = "subtitles", | |
| cookies_from_browser: str = None, | |
| cookies_file: str = None, | |
| ) -> bool: | |
| """ | |
| Download non-auto-generated English subtitles from a YouTube playlist. | |
| Args: | |
| playlist_url: URL of the YouTube playlist | |
| output_dir: Directory to save subtitles to | |
| cookies_from_browser: Browser name to extract cookies from (e.g., 'chrome', 'firefox') | |
| cookies_file: Path to a Netscape-format cookies file | |
| Returns: | |
| True if successful, False otherwise | |
| """ | |
| # Create output directory if it doesn't exist | |
| os.makedirs(output_dir, exist_ok=True) | |
| # Build yt-dlp command | |
| cmd = [ | |
| "yt-dlp", | |
| "--write-subs", # Write subtitle files | |
| "--no-write-auto-subs", # Don't write auto-generated subtitles | |
| "--sub-langs", | |
| "en.*", # All English variants (en, en-US, en-GB, etc.) | |
| "--skip-download", # Don't download video, only subtitles | |
| "--sub-format", | |
| "srt/vtt/best", # Prefer srt, then vtt, then best available | |
| "-o", | |
| os.path.join(output_dir, "%(title)s [%(id)s].%(ext)s"), # Output template | |
| "--ignore-errors", # Continue on errors (some videos may not have manual subs) | |
| "--no-warnings", # Suppress warnings for cleaner output | |
| ] | |
| # Add authentication if provided | |
| if cookies_from_browser: | |
| cmd.extend(["--cookies-from-browser", cookies_from_browser]) | |
| elif cookies_file: | |
| cmd.extend(["--cookies", cookies_file]) | |
| cmd.append(playlist_url) | |
| print(f"Downloading subtitles from: {playlist_url}") | |
| print(f"Output directory: {output_dir}") | |
| print(f"Running command: {' '.join(cmd)}") | |
| print("-" * 50) | |
| try: | |
| result = subprocess.run(cmd, check=False) | |
| return result.returncode == 0 | |
| except FileNotFoundError: | |
| print( | |
| "Error: yt-dlp is not installed. Please install it with: pip install yt-dlp" | |
| ) | |
| return False | |
| except Exception as e: | |
| print(f"Error running yt-dlp: {e}") | |
| return False | |
| def validate_subtitles(output_dir: str, search_term: str) -> bool: | |
| """ | |
| Validate that subtitles were downloaded and contain the expected content. | |
| Args: | |
| output_dir: Directory where subtitles were saved | |
| search_term: Text to search for in the subtitles | |
| Returns: | |
| True if search term is found in any subtitle file, False otherwise | |
| """ | |
| # Find all subtitle files (common extensions) | |
| subtitle_extensions = ["*.srt", "*.vtt", "*.ass", "*.ssa", "*.sub"] | |
| subtitle_files = [] | |
| for ext in subtitle_extensions: | |
| subtitle_files.extend(glob.glob(os.path.join(output_dir, ext))) | |
| if not subtitle_files: | |
| print(f"No subtitle files found in {output_dir}") | |
| return False | |
| print(f"\nFound {len(subtitle_files)} subtitle file(s):") | |
| for f in subtitle_files: | |
| print(f" - {os.path.basename(f)}") | |
| print(f"\nSearching for '{search_term}' in subtitle files...") | |
| found_in_files = [] | |
| for subtitle_file in subtitle_files: | |
| try: | |
| with open(subtitle_file, "r", encoding="utf-8", errors="ignore") as f: | |
| content = f.read() | |
| if search_term.lower() in content.lower(): | |
| found_in_files.append(subtitle_file) | |
| except Exception as e: | |
| print(f"Error reading {subtitle_file}: {e}") | |
| if found_in_files: | |
| print(f"\n✓ SUCCESS: Found '{search_term}' in {len(found_in_files)} file(s):") | |
| for f in found_in_files: | |
| print(f" - {os.path.basename(f)}") | |
| return True | |
| else: | |
| print(f"\n✗ NOT FOUND: '{search_term}' was not found in any subtitle files") | |
| return False | |
| def main(): | |
| parser = argparse.ArgumentParser( | |
| description="Download non-auto-generated English subtitles from a YouTube playlist" | |
| ) | |
| parser.add_argument( | |
| "playlist_url", | |
| nargs="?", | |
| default="https://www.youtube.com/playlist?list=PLIx8QniXH-rElLyjzNMSOXSTbOKsDShyu", | |
| help="URL of the YouTube playlist (default: test playlist)", | |
| ) | |
| parser.add_argument( | |
| "-o", | |
| "--output-dir", | |
| default="subtitles", | |
| help="Directory to save subtitles (default: subtitles)", | |
| ) | |
| parser.add_argument( | |
| "--cookies-from-browser", | |
| metavar="BROWSER", | |
| help="Browser to extract cookies from (e.g., chrome, firefox, edge, safari, brave)", | |
| ) | |
| parser.add_argument( | |
| "--cookies", metavar="FILE", help="Path to Netscape-format cookies file" | |
| ) | |
| parser.add_argument( | |
| "--validate", | |
| metavar="TERM", | |
| help="Search term to validate in downloaded subtitles", | |
| ) | |
| args = parser.parse_args() | |
| # Download subtitles | |
| success = download_subtitles( | |
| args.playlist_url, | |
| args.output_dir, | |
| cookies_from_browser=args.cookies_from_browser, | |
| cookies_file=args.cookies, | |
| ) | |
| if not success: | |
| print("\nSubtitle download completed with errors.") | |
| # Validate if requested | |
| if args.validate: | |
| validation_result = validate_subtitles(args.output_dir, args.validate) | |
| sys.exit(0 if validation_result else 1) | |
| else: | |
| # If no validation term provided, just check if any files were downloaded | |
| subtitle_extensions = ["*.srt", "*.vtt", "*.ass", "*.ssa", "*.sub"] | |
| subtitle_files = [] | |
| for ext in subtitle_extensions: | |
| subtitle_files.extend(glob.glob(os.path.join(args.output_dir, ext))) | |
| if subtitle_files: | |
| print(f"\n✓ Downloaded {len(subtitle_files)} subtitle file(s)") | |
| sys.exit(0) | |
| else: | |
| print("\n✗ No subtitle files were downloaded") | |
| print("Note: Not all videos have manually created subtitles available") | |
| sys.exit(1) | |
| if __name__ == "__main__": | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment