Skip to content

Instantly share code, notes, and snippets.

@sspaeti
Created November 27, 2025 10:24
Show Gist options
  • Select an option

  • Save sspaeti/c0b036defefaf1d042c2c44b5c49fbaa to your computer and use it in GitHub Desktop.

Select an option

Save sspaeti/c0b036defefaf1d042c2c44b5c49fbaa to your computer and use it in GitHub Desktop.
Creates a file with all RSS feeds for each channel and Title for adding to Newsboat or save/backup
#!/usr/bin/env python3
"""
Extract YouTube subscription data and create RSS feed URLs
"""
import json
import re
import html
from pathlib import Path
def extract_json_data(html_file):
"""Extract ytInitialData JSON from HTML file"""
with open(html_file, 'r', encoding='utf-8') as f:
content = f.read()
# Find ytInitialData JSON
match = re.search(r'var ytInitialData = ({.*?});', content, re.DOTALL)
if match:
json_str = match.group(1)
return json.loads(json_str)
return None
def extract_subscriptions(data):
"""Extract channel information from ytInitialData"""
subscriptions = []
try:
# Navigate through the JSON structure
contents = data['contents']['twoColumnBrowseResultsRenderer']['tabs'][0]['tabRenderer']['content']['sectionListRenderer']['contents']
for section in contents:
if 'itemSectionRenderer' in section:
items = section['itemSectionRenderer']['contents']
for item in items:
if 'shelfRenderer' in item:
shelf_items = item['shelfRenderer']['content']['expandedShelfContentsRenderer']['items']
for channel_item in shelf_items:
if 'channelRenderer' in channel_item:
channel = channel_item['channelRenderer']
channel_id = channel.get('channelId', '')
title = channel.get('title', {}).get('simpleText', '')
handle = channel.get('subscriberCountText', {}).get('simpleText', '')
if channel_id:
subscriptions.append({
'channel_id': channel_id,
'title': title,
'handle': handle
})
except (KeyError, TypeError) as e:
print(f"Error parsing data: {e}")
return subscriptions
def format_rss_feed(channel_id, title, handle):
"""Format RSS feed URL according to user specification"""
# Clean up the handle if it doesn't start with @
if not handle.startswith('@'):
handle = f"@{handle}"
rss_url = f"https://www.youtube.com/feeds/videos.xml?channel_id={channel_id}"
tag = f'youtube "~{title} {handle}"'
return f"{rss_url} ! {tag}"
def main():
html_file = Path(__file__).parent / "All subscriptions - YouTube.html"
output_file = Path(__file__).parent / "youtube_subscriptions_rss.txt"
print(f"Reading: {html_file}")
data = extract_json_data(html_file)
if not data:
print("Error: Could not extract JSON data from HTML file")
return
print("Extracting subscriptions...")
subscriptions = extract_subscriptions(data)
print(f"Found {len(subscriptions)} subscriptions")
# Create RSS feed URLs
rss_feeds = []
for sub in subscriptions:
feed = format_rss_feed(sub['channel_id'], sub['title'], sub['handle'])
rss_feeds.append(feed)
# Save to file
with open(output_file, 'w', encoding='utf-8') as f:
f.write('\n'.join(rss_feeds))
print(f"\nSaved {len(rss_feeds)} RSS feeds to: {output_file}")
# Show first 5 examples
print("\nFirst 5 examples:")
for feed in rss_feeds[:5]:
print(feed)
if __name__ == '__main__':
main()
@sspaeti
Copy link
Author

sspaeti commented Nov 27, 2025

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment