Created
January 17, 2026 10:27
-
-
Save qoli/bac04246e5bf5c078f5c05003a100541 to your computer and use it in GitHub Desktop.
This is a minimal script I use to probe whether a runtime can handle JS + iframe + dynamic media pages.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| 'use strict' | |
| import fs from 'node:fs'; | |
| import os from 'node:os'; | |
| import path from 'node:path'; | |
| import puppeteer from 'puppeteer'; | |
| import puppeteerCore from 'puppeteer-core'; | |
| const DEFAULT_OPTIONS = { | |
| rounds: 6, | |
| roundDelayMs: 1500, | |
| roundTimeoutMs: 8000, | |
| waitSelector: '', | |
| json: false, | |
| userAgent: '', | |
| viewportWidth: 1280, | |
| viewportHeight: 720, | |
| backend: 'headless-chrome', | |
| wsEndpoint: '', | |
| executablePath: '', | |
| headless: true, | |
| }; | |
| const DEFAULT_WS_ENDPOINT = 'ws://lightpanda.orb.local'; | |
| const MEDIA_RE = /\.(m3u8|mp4)([?#]|$)/i; | |
| const sleep = (ms) => new Promise((resolve) => setTimeout(resolve, ms)); | |
| const nowStamp = () => new Date().toISOString(); | |
| const logInfo = (message) => { | |
| console.log(`[${nowStamp()}] ${message}`); | |
| }; | |
| const parseArgs = (argv) => { | |
| const options = { ...DEFAULT_OPTIONS, url: '' }; | |
| const args = [...argv]; | |
| for (let i = 0; i < args.length; i += 1) { | |
| const arg = args[i]; | |
| if (!arg.startsWith('--') && !options.url) { | |
| options.url = arg; | |
| continue; | |
| } | |
| const nextValue = () => { | |
| if (arg.includes('=')) { | |
| return arg.split('=').slice(1).join('='); | |
| } | |
| return args[i + 1]; | |
| }; | |
| if (arg === '--help' || arg === '-h') { | |
| options.help = true; | |
| } else if (arg.startsWith('--rounds')) { | |
| const value = Number.parseInt(nextValue(), 10); | |
| if (Number.isFinite(value) && value > 0) options.rounds = value; | |
| if (!arg.includes('=')) i += 1; | |
| } else if (arg.startsWith('--round-delay')) { | |
| const value = Number.parseInt(nextValue(), 10); | |
| if (Number.isFinite(value) && value >= 0) options.roundDelayMs = value; | |
| if (!arg.includes('=')) i += 1; | |
| } else if (arg.startsWith('--round-timeout')) { | |
| const value = Number.parseInt(nextValue(), 10); | |
| if (Number.isFinite(value) && value > 0) options.roundTimeoutMs = value; | |
| if (!arg.includes('=')) i += 1; | |
| } else if (arg.startsWith('--selector')) { | |
| options.waitSelector = nextValue() || ''; | |
| if (!arg.includes('=')) i += 1; | |
| } else if (arg.startsWith('--backend')) { | |
| const value = (nextValue() || '').toLowerCase(); | |
| if (value === 'lightpanda' || value === 'headless-chrome') { | |
| options.backend = value; | |
| } | |
| if (!arg.includes('=')) i += 1; | |
| } else if (arg.startsWith('--ws-endpoint')) { | |
| options.wsEndpoint = nextValue() || ''; | |
| if (!arg.includes('=')) i += 1; | |
| } else if (arg.startsWith('--executable-path')) { | |
| options.executablePath = nextValue() || ''; | |
| if (!arg.includes('=')) i += 1; | |
| } else if (arg.startsWith('--ua')) { | |
| options.userAgent = nextValue() || ''; | |
| if (!arg.includes('=')) i += 1; | |
| } else if (arg === '--json') { | |
| options.json = true; | |
| } else if (arg === '--headful') { | |
| options.headless = false; | |
| } | |
| } | |
| return options; | |
| }; | |
| const usage = () => { | |
| return [ | |
| 'Usage: node app.js <url> [options]', | |
| '', | |
| 'Options:', | |
| ' --rounds <n> number of trigger+scan rounds (default: 6)', | |
| ' --round-delay <ms> delay between rounds (default: 1500)', | |
| ' --round-timeout <ms> wait per round for media (m3u8/mp4) (default: 8000)', | |
| ' --selector <css> wait for selector before rounds', | |
| ' --backend <name> headless-chrome or lightpanda (default: headless-chrome)', | |
| ' --ws-endpoint <url> connect to remote CDP endpoint', | |
| ' --executable-path <path> launch with specific Chrome/Chromium binary', | |
| ' --ua <string> custom user agent', | |
| ' --json output JSON with sources', | |
| ' --headful run with visible browser window', | |
| ].join('\n'); | |
| }; | |
| const normalizeUrl = (value) => { | |
| if (!value) return ''; | |
| const trimmed = String(value).trim(); | |
| if (!trimmed) return ''; | |
| if (/^https?:\/\//i.test(trimmed)) return trimmed; | |
| return `https://${trimmed}`; | |
| }; | |
| const createResultStore = () => { | |
| const store = new Map(); | |
| const record = (maybeUrl, source) => { | |
| if (!maybeUrl) return; | |
| const urlString = String(maybeUrl); | |
| if (!MEDIA_RE.test(urlString)) return; | |
| const normalized = urlString.trim(); | |
| if (!normalized) return; | |
| const typeMatch = normalized.match(/\.(m3u8|mp4)([?#]|$)/i); | |
| const mediaType = typeMatch ? typeMatch[1].toLowerCase() : 'unknown'; | |
| if (!store.has(normalized)) { | |
| store.set(normalized, { | |
| url: normalized, | |
| sources: new Set(source ? [source] : []), | |
| types: new Set(mediaType !== 'unknown' ? [mediaType] : []), | |
| firstSeen: Date.now(), | |
| }); | |
| return; | |
| } | |
| if (source) { | |
| store.get(normalized).sources.add(source); | |
| } | |
| if (mediaType !== 'unknown') { | |
| store.get(normalized).types.add(mediaType); | |
| } | |
| }; | |
| return { | |
| record, | |
| entries: () => Array.from(store.values()), | |
| size: () => store.size, | |
| }; | |
| }; | |
| let browser; | |
| let page; | |
| let resultStore; | |
| let recordResult; | |
| let browserIsRemote = false; | |
| let activePuppeteer = puppeteer; | |
| const resolvePuppeteerForOptions = (options) => { | |
| if (options.backend === 'lightpanda') { | |
| return puppeteerCore; | |
| } | |
| return puppeteer; | |
| }; | |
| const fileExists = (value) => { | |
| try { | |
| return fs.statSync(value).isFile(); | |
| } catch (_) { | |
| return false; | |
| } | |
| }; | |
| const dirExists = (value) => { | |
| try { | |
| return fs.statSync(value).isDirectory(); | |
| } catch (_) { | |
| return false; | |
| } | |
| }; | |
| const getDefaultCacheDirs = () => { | |
| const dirs = []; | |
| if (process.env.PUPPETEER_CACHE_DIR) { | |
| dirs.push(process.env.PUPPETEER_CACHE_DIR); | |
| } | |
| const homeDir = os.homedir(); | |
| if (homeDir) { | |
| dirs.push(path.join(homeDir, '.cache', 'puppeteer')); | |
| if (process.platform === 'darwin') { | |
| dirs.push(path.join(homeDir, 'Library', 'Caches', 'puppeteer')); | |
| } | |
| } | |
| return Array.from(new Set(dirs)); | |
| }; | |
| const findHeadlessShellExecutable = () => { | |
| const exeName = process.platform === 'win32' | |
| ? 'chrome-headless-shell.exe' | |
| : 'chrome-headless-shell'; | |
| const roots = getDefaultCacheDirs() | |
| .map((dir) => path.join(dir, 'chrome-headless-shell')) | |
| .filter(dirExists); | |
| const candidates = []; | |
| for (const root of roots) { | |
| let versionDirs = []; | |
| try { | |
| versionDirs = fs.readdirSync(root, { withFileTypes: true }) | |
| .filter((entry) => entry.isDirectory()) | |
| .map((entry) => path.join(root, entry.name)); | |
| } catch (_) { | |
| versionDirs = []; | |
| } | |
| versionDirs.sort((a, b) => { | |
| try { | |
| return fs.statSync(b).mtimeMs - fs.statSync(a).mtimeMs; | |
| } catch (_) { | |
| return 0; | |
| } | |
| }); | |
| for (const versionDir of versionDirs) { | |
| const direct = path.join(versionDir, exeName); | |
| if (fileExists(direct)) { | |
| candidates.push(direct); | |
| continue; | |
| } | |
| let subdirs = []; | |
| try { | |
| subdirs = fs.readdirSync(versionDir, { withFileTypes: true }) | |
| .filter((entry) => entry.isDirectory()) | |
| .map((entry) => path.join(versionDir, entry.name)); | |
| } catch (_) { | |
| subdirs = []; | |
| } | |
| for (const subdir of subdirs) { | |
| const nested = path.join(subdir, exeName); | |
| if (fileExists(nested)) { | |
| candidates.push(nested); | |
| } | |
| } | |
| } | |
| } | |
| if (candidates.length === 0) return ''; | |
| candidates.sort((a, b) => { | |
| try { | |
| return fs.statSync(b).mtimeMs - fs.statSync(a).mtimeMs; | |
| } catch (_) { | |
| return 0; | |
| } | |
| }); | |
| return candidates[0] || ''; | |
| }; | |
| const connectBrowser = async (options) => { | |
| activePuppeteer = resolvePuppeteerForOptions(options); | |
| if (options.wsEndpoint) { | |
| browserIsRemote = true; | |
| return activePuppeteer.connect({ browserWSEndpoint: options.wsEndpoint }); | |
| } | |
| browserIsRemote = false; | |
| const launchOptions = {}; | |
| if (options.executablePath) { | |
| launchOptions.executablePath = options.executablePath; | |
| } | |
| if (options.headless === false) { | |
| launchOptions.headless = false; | |
| } | |
| return activePuppeteer.launch(launchOptions); | |
| }; | |
| const setupPage = async (options) => { | |
| if (!page) return; | |
| await page.setViewport({ width: options.viewportWidth, height: options.viewportHeight }); | |
| if (options.userAgent) { | |
| await page.setUserAgent(options.userAgent); | |
| } | |
| page.removeAllListeners('request'); | |
| page.removeAllListeners('response'); | |
| page.on('request', (request) => { | |
| recordResult(request.url(), 'request'); | |
| }); | |
| page.on('response', (response) => { | |
| recordResult(response.url(), 'response'); | |
| }); | |
| }; | |
| const newPage = async (options) => { | |
| page = await browser.newPage(); | |
| await setupPage(options); | |
| }; | |
| const reconnect = async (options) => { | |
| try { | |
| if (browser) { | |
| if (browserIsRemote) { | |
| await browser.disconnect(); | |
| } else { | |
| await browser.close(); | |
| } | |
| } | |
| } catch (_) { | |
| // Ignore disconnect failures. | |
| } | |
| logInfo('Reconnecting to browser...'); | |
| browser = await connectBrowser(options); | |
| await newPage(options); | |
| }; | |
| const gotoWithRetry = async (targetUrl, options) => { | |
| const attempts = [ | |
| { waitUntil: 'domcontentloaded', timeout: 45000 }, | |
| { waitUntil: 'load', timeout: 45000 }, | |
| ]; | |
| let lastError; | |
| for (let i = 0; i < attempts.length; i += 1) { | |
| try { | |
| logInfo(`Navigating (attempt ${i + 1}) to: ${targetUrl}`); | |
| await page.goto(targetUrl, attempts[i]); | |
| return; | |
| } catch (error) { | |
| lastError = error; | |
| const message = String(error?.message || '').toLowerCase(); | |
| if (message.includes('connection closed')) { | |
| await reconnect(options); | |
| await sleep(1000); | |
| continue; | |
| } | |
| if (message.includes('detached frame') || message.includes('frame was detached')) { | |
| try { | |
| await page.waitForSelector('body', { timeout: 5000 }); | |
| return; | |
| } catch (_) { | |
| // If the frame is really gone, try a fresh page/context. | |
| } | |
| try { | |
| await newPage(options); | |
| await sleep(1000); | |
| continue; | |
| } catch (_) { | |
| await reconnect(options); | |
| await sleep(1000); | |
| continue; | |
| } | |
| } | |
| await sleep(1000); | |
| } | |
| } | |
| throw lastError; | |
| }; | |
| const waitForMedia = (timeoutMs) => new Promise((resolve) => { | |
| if (resultStore && resultStore.size() > 0) { | |
| resolve(); | |
| return; | |
| } | |
| const timeoutId = setTimeout(() => { | |
| cleanup(); | |
| resolve(); | |
| }, timeoutMs); | |
| const onResponse = (response) => { | |
| recordResult(response.url(), 'response'); | |
| if (resultStore && resultStore.size() > 0) { | |
| cleanup(); | |
| resolve(); | |
| } | |
| }; | |
| const cleanup = () => { | |
| clearTimeout(timeoutId); | |
| page.off('response', onResponse); | |
| }; | |
| page.on('response', onResponse); | |
| }); | |
| const triggerPlaybackInFrame = async (frame) => { | |
| if (!frame) return false; | |
| try { | |
| return await frame.evaluate(() => { | |
| const tryPlayVideo = () => { | |
| const video = document.querySelector('video'); | |
| if (!video) return false; | |
| video.muted = true; | |
| const playPromise = video.play(); | |
| if (playPromise && typeof playPromise.catch === 'function') { | |
| playPromise.catch(() => {}); | |
| } | |
| return true; | |
| }; | |
| if (tryPlayVideo()) return true; | |
| const clickableSelectors = [ | |
| '.dplayer-play-icon', | |
| '.dplayer-play-button', | |
| '.vjs-big-play-button', | |
| '.play', | |
| '.player-play', | |
| '.btn-play', | |
| 'button[aria-label="Play"]', | |
| ]; | |
| for (const selector of clickableSelectors) { | |
| const el = document.querySelector(selector); | |
| if (el) { | |
| el.click(); | |
| return true; | |
| } | |
| } | |
| const textButtons = Array.from(document.querySelectorAll('button, a, div')) | |
| .filter((el) => /播放|play/i.test(el.innerText || '')); | |
| if (textButtons.length > 0) { | |
| textButtons[0].click(); | |
| return true; | |
| } | |
| return false; | |
| }); | |
| } catch (_) { | |
| return false; | |
| } | |
| }; | |
| const triggerPlaybackInAllFrames = async () => { | |
| const frames = page.frames(); | |
| await Promise.all(frames.map((frame) => triggerPlaybackInFrame(frame))); | |
| }; | |
| const scanFrameForMedia = async (frame) => { | |
| if (!frame) return []; | |
| try { | |
| return await frame.evaluate(() => { | |
| const results = new Set(); | |
| const recordLocal = (value) => { | |
| if (typeof value === 'string' && /\.(m3u8|mp4)([?#]|$)/i.test(value)) { | |
| results.add(value); | |
| } | |
| }; | |
| if (!window.__m3u8Captured) { | |
| window.__m3u8Captured = []; | |
| } | |
| if (!window.__m3u8Record) { | |
| window.__m3u8Record = (value) => { | |
| if (typeof value === 'string' && /\.(m3u8|mp4)([?#]|$)/i.test(value)) { | |
| window.__m3u8Captured.push(value); | |
| } | |
| }; | |
| } | |
| const recordPersistent = window.__m3u8Record; | |
| if (!window.__m3u8HooksInstalled) { | |
| window.__m3u8HooksInstalled = true; | |
| if (window.fetch && !window.__m3u8FetchWrapped) { | |
| window.__m3u8FetchWrapped = true; | |
| const originalFetch = window.fetch.bind(window); | |
| window.fetch = (...args) => { | |
| try { | |
| const input = args[0]; | |
| const url = typeof input === 'string' ? input : input?.url; | |
| recordPersistent(url); | |
| } catch (_) {} | |
| return originalFetch(...args); | |
| }; | |
| } | |
| if (window.XMLHttpRequest && !XMLHttpRequest.prototype.__m3u8Wrapped) { | |
| XMLHttpRequest.prototype.__m3u8Wrapped = true; | |
| const originalOpen = XMLHttpRequest.prototype.open; | |
| XMLHttpRequest.prototype.open = function (method, url, ...rest) { | |
| try { | |
| recordPersistent(url); | |
| } catch (_) {} | |
| return originalOpen.call(this, method, url, ...rest); | |
| }; | |
| } | |
| if (window.Hls && !window.Hls.__m3u8Wrapped) { | |
| window.Hls.__m3u8Wrapped = true; | |
| const originalLoadSource = window.Hls.prototype?.loadSource; | |
| if (originalLoadSource) { | |
| window.Hls.prototype.loadSource = function (url) { | |
| try { | |
| recordPersistent(url); | |
| } catch (_) {} | |
| return originalLoadSource.call(this, url); | |
| }; | |
| } | |
| } | |
| } | |
| if (window.Hls && !window.Hls.__m3u8Wrapped) { | |
| window.Hls.__m3u8Wrapped = true; | |
| const originalLoadSource = window.Hls.prototype?.loadSource; | |
| if (originalLoadSource) { | |
| window.Hls.prototype.loadSource = function (url) { | |
| try { | |
| recordPersistent(url); | |
| } catch (_) {} | |
| return originalLoadSource.call(this, url); | |
| }; | |
| } | |
| } | |
| if (typeof performance !== 'undefined') { | |
| performance.getEntriesByType('resource') | |
| .forEach((entry) => recordLocal(entry.name)); | |
| } | |
| const mediaEls = document.querySelectorAll('video, source'); | |
| mediaEls.forEach((el) => recordLocal(el.src)); | |
| const html = document.documentElement ? document.documentElement.innerHTML : ''; | |
| const matches = html.match(/https?:\/\/[^"'\\\s]+\.(?:m3u8|mp4)[^"'\\\s]*/gi) || []; | |
| matches.forEach((match) => recordLocal(match)); | |
| if (Array.isArray(window.__m3u8Captured)) { | |
| window.__m3u8Captured.forEach((value) => recordLocal(value)); | |
| } | |
| return Array.from(results); | |
| }); | |
| } catch (_) { | |
| return []; | |
| } | |
| }; | |
| const scanAllFrames = async () => { | |
| const frames = page.frames(); | |
| const results = await Promise.all(frames.map((frame) => scanFrameForMedia(frame))); | |
| results.flat().forEach((url) => recordResult(url, 'scan')); | |
| }; | |
| const waitForDomAndNetwork = async (options) => { | |
| logInfo('Waiting for DOM ready and network idle...'); | |
| try { | |
| await page.waitForFunction(() => document.readyState === 'complete', { timeout: 30000 }); | |
| } catch (_) { | |
| // Some pages never reach "complete"; continue anyway. | |
| } | |
| try { | |
| await page.waitForNetworkIdle({ idleTime: 1000, timeout: 30000 }); | |
| } catch (_) { | |
| // Some pages keep network busy; continue anyway. | |
| } | |
| if (options.waitSelector) { | |
| logInfo(`Waiting for selector: ${options.waitSelector}`); | |
| try { | |
| await page.waitForSelector(options.waitSelector, { timeout: 30000 }); | |
| } catch (_) { | |
| // Selector never appeared; continue. | |
| } | |
| } | |
| }; | |
| const main = async () => { | |
| const options = parseArgs(process.argv.slice(2)); | |
| if (options.help || !options.url) { | |
| console.log(usage()); | |
| if (!options.url) process.exitCode = 1; | |
| return; | |
| } | |
| options.url = normalizeUrl(options.url); | |
| if (!options.url) { | |
| console.log('Invalid URL'); | |
| process.exitCode = 1; | |
| return; | |
| } | |
| logInfo(`Target URL: ${options.url}`); | |
| resultStore = createResultStore(); | |
| recordResult = resultStore.record; | |
| if (options.backend === 'lightpanda') { | |
| if (!options.wsEndpoint) { | |
| options.wsEndpoint = DEFAULT_WS_ENDPOINT; | |
| } | |
| } else if (options.backend === 'headless-chrome') { | |
| options.wsEndpoint = ''; | |
| } | |
| if (!options.wsEndpoint && !options.executablePath) { | |
| const envPath = process.env.PUPPETEER_EXECUTABLE_PATH; | |
| if (envPath && fileExists(envPath)) { | |
| options.executablePath = envPath; | |
| } else { | |
| const detected = findHeadlessShellExecutable(); | |
| if (detected) { | |
| options.executablePath = detected; | |
| } | |
| } | |
| } | |
| if (options.wsEndpoint) { | |
| logInfo(`Connecting to CDP: ${options.wsEndpoint}`); | |
| } else { | |
| logInfo('Launching browser via Puppeteer...'); | |
| if (options.executablePath) { | |
| logInfo(`Using executable: ${options.executablePath}`); | |
| } | |
| } | |
| browser = await connectBrowser(options); | |
| await newPage(options); | |
| try { | |
| await gotoWithRetry(options.url, options); | |
| await waitForDomAndNetwork(options); | |
| for (let round = 0; round < options.rounds; round += 1) { | |
| logInfo(`Round ${round + 1}/${options.rounds}: trigger playback`); | |
| await triggerPlaybackInAllFrames(); | |
| logInfo(`Round ${round + 1}/${options.rounds}: waiting for media`); | |
| await waitForMedia(options.roundTimeoutMs); | |
| logInfo(`Round ${round + 1}/${options.rounds}: scanning frames`); | |
| await scanAllFrames(); | |
| if (resultStore.size() > 0) { | |
| logInfo(`media found (${resultStore.size()} unique).`); | |
| break; | |
| } | |
| if (round < options.rounds - 1) { | |
| logInfo(`No media yet; sleeping ${options.roundDelayMs}ms before next round.`); | |
| await sleep(options.roundDelayMs); | |
| } | |
| } | |
| const entries = resultStore.entries(); | |
| if (options.json) { | |
| const payload = { | |
| url: options.url, | |
| count: entries.length, | |
| results: entries.map((entry) => ({ | |
| url: entry.url, | |
| sources: Array.from(entry.sources), | |
| types: Array.from(entry.types || []), | |
| firstSeen: new Date(entry.firstSeen).toISOString(), | |
| })), | |
| }; | |
| console.log(JSON.stringify(payload, null, 2)); | |
| return; | |
| } | |
| if (entries.length === 0) { | |
| logInfo('No media found.'); | |
| console.log('NO_MEDIA_FOUND'); | |
| return; | |
| } | |
| entries.forEach((entry) => { | |
| console.log(entry.url); | |
| }); | |
| } finally { | |
| try { | |
| if (page) await page.close(); | |
| } catch (_) {} | |
| try { | |
| if (browser) { | |
| if (browserIsRemote) { | |
| await browser.disconnect(); | |
| } else { | |
| await browser.close(); | |
| } | |
| } | |
| } catch (_) {} | |
| } | |
| }; | |
| await main(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment