Created
January 17, 2026 03:57
-
-
Save banyudu/c2cd466470a8a2ff1daecd5aed36b3eb to your computer and use it in GitHub Desktop.
Recover broken webm file
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env npx ts-node | |
| /** | |
| * WebM File Recovery Tool | |
| * | |
| * Recovers broken WebM files that have: | |
| * - Missing or corrupted EBML headers | |
| * - Prepended garbage data | |
| * - Corrupted first byte of EBML magic | |
| * | |
| * Usage: | |
| * npx ts-node recover-webm.ts <input.webm> [output.webm] | |
| * # or with bun: | |
| * bun recover-webm.ts <input.webm> [output.webm] | |
| * | |
| * @author Recovery Tool | |
| * @license MIT | |
| */ | |
| import * as fs from "fs"; | |
| import * as path from "path"; | |
| import { execSync } from "child_process"; | |
| // ============================================================================ | |
| // Constants - WebM/EBML Element IDs and Signatures | |
| // ============================================================================ | |
| const EBML_MAGIC = Buffer.from([0x1a, 0x45, 0xdf, 0xa3]); | |
| const EBML_MAGIC_CORRUPTED_VARIANTS = [ | |
| Buffer.from([0xa3, 0x45, 0xdf, 0xa3]), // First byte corrupted to 0xa3 | |
| Buffer.from([0x00, 0x45, 0xdf, 0xa3]), // First byte is null | |
| Buffer.from([0xff, 0x45, 0xdf, 0xa3]), // First byte is 0xff | |
| ]; | |
| const SEGMENT_ID = Buffer.from([0x18, 0x53, 0x80, 0x67]); | |
| const CLUSTER_ID = Buffer.from([0x1f, 0x43, 0xb6, 0x75]); | |
| const TRACKS_ID = Buffer.from([0x16, 0x54, 0xae, 0x6b]); | |
| // String markers to search for | |
| const MARKERS = { | |
| webm: Buffer.from("webm", "ascii"), | |
| chrome: Buffer.from("Chrome", "ascii"), | |
| chromeWA: Buffer.from("ChromeWA", "ascii"), | |
| opusHead: Buffer.from("OpusHead", "ascii"), | |
| vorbis: Buffer.from("vorbis", "ascii"), | |
| aOpus: Buffer.from("A_OPUS", "ascii"), | |
| aVorbis: Buffer.from("A_VORBIS", "ascii"), | |
| }; | |
| // ============================================================================ | |
| // Types | |
| // ============================================================================ | |
| interface DiagnosticResult { | |
| isValid: boolean; | |
| hasEbmlHeader: boolean; | |
| ebmlOffset: number; | |
| isEbmlCorrupted: boolean; | |
| corruptedByte?: number; | |
| markers: { name: string; offset: number }[]; | |
| clusterOffsets: number[]; | |
| fileSize: number; | |
| suggestedAction: string; | |
| } | |
| interface RecoveryResult { | |
| success: boolean; | |
| outputPath?: string; | |
| message: string; | |
| diagnostics: DiagnosticResult; | |
| } | |
| // ============================================================================ | |
| // Utility Functions | |
| // ============================================================================ | |
| /** | |
| * Find all occurrences of a pattern in a buffer | |
| */ | |
| function findAllOccurrences( | |
| buffer: Buffer, | |
| pattern: Buffer, | |
| maxResults = 100 | |
| ): number[] { | |
| const offsets: number[] = []; | |
| let offset = 0; | |
| while (offset < buffer.length && offsets.length < maxResults) { | |
| const idx = buffer.indexOf(pattern, offset); | |
| if (idx === -1) break; | |
| offsets.push(idx); | |
| offset = idx + 1; | |
| } | |
| return offsets; | |
| } | |
| /** | |
| * Find the first occurrence of any pattern from a list | |
| */ | |
| function findFirstMatch( | |
| buffer: Buffer, | |
| patterns: Buffer[] | |
| ): { pattern: Buffer; offset: number } | null { | |
| let earliest: { pattern: Buffer; offset: number } | null = null; | |
| for (const pattern of patterns) { | |
| const idx = buffer.indexOf(pattern); | |
| if (idx !== -1 && (earliest === null || idx < earliest.offset)) { | |
| earliest = { pattern, offset: idx }; | |
| } | |
| } | |
| return earliest; | |
| } | |
| /** | |
| * Check if buffer starts with EBML magic bytes | |
| */ | |
| function hasValidEbmlHeader(buffer: Buffer): boolean { | |
| return buffer.slice(0, 4).equals(EBML_MAGIC); | |
| } | |
| /** | |
| * Find EBML header location, considering potential corruption | |
| */ | |
| function findEbmlHeader(buffer: Buffer): { | |
| offset: number; | |
| isCorrupted: boolean; | |
| corruptedByte?: number; | |
| } | null { | |
| // First, look for perfect match | |
| const perfectMatch = buffer.indexOf(EBML_MAGIC); | |
| if (perfectMatch !== -1) { | |
| return { offset: perfectMatch, isCorrupted: false }; | |
| } | |
| // Look for corrupted variants (only first byte different) | |
| for (const variant of EBML_MAGIC_CORRUPTED_VARIANTS) { | |
| const idx = buffer.indexOf(variant); | |
| if (idx !== -1) { | |
| return { offset: idx, isCorrupted: true, corruptedByte: buffer[idx] }; | |
| } | |
| } | |
| // Look for partial match (bytes 2-4 of EBML magic: 45 DF A3) | |
| const partialMagic = EBML_MAGIC.slice(1); // [0x45, 0xdf, 0xa3] | |
| let searchOffset = 0; | |
| while (searchOffset < buffer.length - 3) { | |
| const idx = buffer.indexOf(partialMagic, searchOffset); | |
| if (idx === -1) break; | |
| // Check if there's a byte before it that could be the corrupted first byte | |
| if (idx > 0) { | |
| const prevByte = buffer[idx - 1]; | |
| // Accept any byte except 0x1a (which would be a perfect match we already checked) | |
| if (prevByte !== 0x1a) { | |
| return { offset: idx - 1, isCorrupted: true, corruptedByte: prevByte }; | |
| } | |
| } | |
| searchOffset = idx + 1; | |
| } | |
| return null; | |
| } | |
| // ============================================================================ | |
| // Diagnostic Functions | |
| // ============================================================================ | |
| /** | |
| * Analyze a potentially broken WebM file | |
| */ | |
| function diagnoseWebmFile(filePath: string): DiagnosticResult { | |
| const buffer = fs.readFileSync(filePath); | |
| const result: DiagnosticResult = { | |
| isValid: false, | |
| hasEbmlHeader: false, | |
| ebmlOffset: -1, | |
| isEbmlCorrupted: false, | |
| markers: [], | |
| clusterOffsets: [], | |
| fileSize: buffer.length, | |
| suggestedAction: "", | |
| }; | |
| // Check if file starts with valid EBML header | |
| result.hasEbmlHeader = hasValidEbmlHeader(buffer); | |
| result.isValid = result.hasEbmlHeader; | |
| if (result.hasEbmlHeader) { | |
| result.ebmlOffset = 0; | |
| result.suggestedAction = "File appears valid. No recovery needed."; | |
| return result; | |
| } | |
| // Find EBML header (possibly corrupted or at wrong offset) | |
| const ebmlLocation = findEbmlHeader(buffer); | |
| if (ebmlLocation) { | |
| result.ebmlOffset = ebmlLocation.offset; | |
| result.isEbmlCorrupted = ebmlLocation.isCorrupted; | |
| result.corruptedByte = ebmlLocation.corruptedByte; | |
| } | |
| // Find markers | |
| for (const [name, marker] of Object.entries(MARKERS)) { | |
| const offsets = findAllOccurrences(buffer, marker, 3); | |
| for (const offset of offsets) { | |
| result.markers.push({ name, offset }); | |
| } | |
| } | |
| // Find clusters | |
| result.clusterOffsets = findAllOccurrences(buffer, CLUSTER_ID, 10); | |
| // Determine suggested action | |
| if (result.ebmlOffset > 0) { | |
| if (result.isEbmlCorrupted) { | |
| result.suggestedAction = `Found corrupted EBML header at offset ${result.ebmlOffset} (first byte is 0x${result.corruptedByte?.toString(16).padStart(2, "0")} instead of 0x1a). Will extract from this offset and fix the header byte.`; | |
| } else { | |
| result.suggestedAction = `Found valid EBML header at offset ${result.ebmlOffset}. Will extract from this offset.`; | |
| } | |
| } else if (result.clusterOffsets.length > 0) { | |
| result.suggestedAction = `No EBML header found, but found ${result.clusterOffsets.length} Cluster elements. May need header transplant from a donor file.`; | |
| } else { | |
| result.suggestedAction = | |
| "Could not identify WebM structure. File may be encrypted or not a WebM file."; | |
| } | |
| return result; | |
| } | |
| /** | |
| * Print diagnostic results | |
| */ | |
| function printDiagnostics(diag: DiagnosticResult): void { | |
| console.log("\n" + "=".repeat(60)); | |
| console.log("WebM File Diagnostics"); | |
| console.log("=".repeat(60)); | |
| console.log(`\nFile size: ${diag.fileSize.toLocaleString()} bytes`); | |
| console.log(`Valid EBML header at start: ${diag.hasEbmlHeader ? "Yes" : "No"}`); | |
| if (diag.ebmlOffset >= 0) { | |
| console.log( | |
| `EBML header found at offset: 0x${diag.ebmlOffset.toString(16)} (${diag.ebmlOffset} bytes)` | |
| ); | |
| if (diag.isEbmlCorrupted) { | |
| console.log( | |
| ` - Header is CORRUPTED: first byte is 0x${diag.corruptedByte?.toString(16).padStart(2, "0")} instead of 0x1a` | |
| ); | |
| } | |
| } else { | |
| console.log("EBML header: NOT FOUND"); | |
| } | |
| if (diag.markers.length > 0) { | |
| console.log("\nMarkers found:"); | |
| for (const marker of diag.markers) { | |
| console.log(` - "${marker.name}" at offset 0x${marker.offset.toString(16)}`); | |
| } | |
| } | |
| if (diag.clusterOffsets.length > 0) { | |
| console.log(`\nCluster elements found: ${diag.clusterOffsets.length}`); | |
| console.log( | |
| ` First cluster at: 0x${diag.clusterOffsets[0].toString(16)}` | |
| ); | |
| } | |
| console.log(`\nSuggested action: ${diag.suggestedAction}`); | |
| console.log("=".repeat(60) + "\n"); | |
| } | |
| // ============================================================================ | |
| // Recovery Functions | |
| // ============================================================================ | |
| /** | |
| * Attempt to recover a broken WebM file | |
| */ | |
| function recoverWebmFile( | |
| inputPath: string, | |
| outputPath: string | |
| ): RecoveryResult { | |
| console.log(`\nAnalyzing: ${inputPath}`); | |
| const diagnostics = diagnoseWebmFile(inputPath); | |
| printDiagnostics(diagnostics); | |
| if (diagnostics.isValid) { | |
| return { | |
| success: true, | |
| message: "File is already valid, no recovery needed.", | |
| diagnostics, | |
| }; | |
| } | |
| if (diagnostics.ebmlOffset < 0) { | |
| return { | |
| success: false, | |
| message: | |
| "Could not find EBML header or any recognizable WebM structure. Recovery not possible with this tool.", | |
| diagnostics, | |
| }; | |
| } | |
| const buffer = fs.readFileSync(inputPath); | |
| // Extract from EBML header offset | |
| console.log( | |
| `Extracting data from offset 0x${diagnostics.ebmlOffset.toString(16)}...` | |
| ); | |
| const extracted = buffer.slice(diagnostics.ebmlOffset); | |
| // Fix corrupted EBML header if needed | |
| if (diagnostics.isEbmlCorrupted) { | |
| console.log( | |
| `Fixing corrupted EBML header byte: 0x${diagnostics.corruptedByte?.toString(16).padStart(2, "0")} -> 0x1a` | |
| ); | |
| extracted[0] = 0x1a; | |
| } | |
| // Write intermediate file | |
| const tempPath = outputPath.replace(/\.webm$/, "_temp.webm"); | |
| fs.writeFileSync(tempPath, extracted); | |
| console.log(`Wrote intermediate file: ${tempPath}`); | |
| // Verify with ffprobe | |
| console.log("\nVerifying extracted file with ffprobe..."); | |
| try { | |
| const probeResult = execSync( | |
| `ffprobe -v error -show_entries stream=codec_name,duration -of json "${tempPath}"`, | |
| { encoding: "utf-8" } | |
| ); | |
| const probeData = JSON.parse(probeResult); | |
| console.log("Stream info:", JSON.stringify(probeData.streams, null, 2)); | |
| } catch (error) { | |
| console.log("Warning: ffprobe verification failed, but continuing..."); | |
| } | |
| // Re-encode to fix timestamps | |
| console.log("\nRe-encoding to fix timestamps..."); | |
| try { | |
| execSync( | |
| `ffmpeg -y -i "${tempPath}" -af "aresample=async=1" -c:a libopus -b:a 64k "${outputPath}" 2>&1`, | |
| { encoding: "utf-8", stdio: "pipe" } | |
| ); | |
| // Clean up temp file | |
| fs.unlinkSync(tempPath); | |
| // Get final file info | |
| const finalProbe = execSync( | |
| `ffprobe -v error -show_entries format=duration -of csv=p=0 "${outputPath}"`, | |
| { encoding: "utf-8" } | |
| ); | |
| const duration = parseFloat(finalProbe.trim()); | |
| const minutes = Math.floor(duration / 60); | |
| const seconds = (duration % 60).toFixed(2); | |
| console.log(`\n✅ Recovery successful!`); | |
| console.log(` Output: ${outputPath}`); | |
| console.log(` Duration: ${minutes}m ${seconds}s`); | |
| return { | |
| success: true, | |
| outputPath, | |
| message: `Successfully recovered ${minutes}m ${seconds}s of audio.`, | |
| diagnostics, | |
| }; | |
| } catch (error) { | |
| // If re-encoding fails, keep the extracted file | |
| fs.renameSync(tempPath, outputPath); | |
| console.log( | |
| "\nWarning: Re-encoding failed, but extracted file may still be usable." | |
| ); | |
| console.log(`Output saved to: ${outputPath}`); | |
| return { | |
| success: true, | |
| outputPath, | |
| message: | |
| "Extracted file saved. Re-encoding failed but file may be partially playable.", | |
| diagnostics, | |
| }; | |
| } | |
| } | |
| /** | |
| * Attempt recovery using a donor file's header | |
| */ | |
| function recoverWithDonorHeader( | |
| brokenPath: string, | |
| donorPath: string, | |
| outputPath: string | |
| ): RecoveryResult { | |
| console.log(`\nAttempting recovery with donor header...`); | |
| console.log(`Broken file: ${brokenPath}`); | |
| console.log(`Donor file: ${donorPath}`); | |
| const brokenBuffer = fs.readFileSync(brokenPath); | |
| const donorBuffer = fs.readFileSync(donorPath); | |
| // Find first cluster in donor file | |
| const donorClusterOffset = donorBuffer.indexOf(CLUSTER_ID); | |
| if (donorClusterOffset === -1) { | |
| return { | |
| success: false, | |
| message: "Donor file does not contain any Cluster elements.", | |
| diagnostics: diagnoseWebmFile(brokenPath), | |
| }; | |
| } | |
| // Extract header from donor (everything before first cluster) | |
| const donorHeader = donorBuffer.slice(0, donorClusterOffset); | |
| console.log( | |
| `Extracted ${donorHeader.length} bytes of header from donor file` | |
| ); | |
| // Find first cluster in broken file | |
| const brokenClusterOffset = brokenBuffer.indexOf(CLUSTER_ID); | |
| if (brokenClusterOffset === -1) { | |
| return { | |
| success: false, | |
| message: "Broken file does not contain any Cluster elements to recover.", | |
| diagnostics: diagnoseWebmFile(brokenPath), | |
| }; | |
| } | |
| // Extract clusters from broken file | |
| const brokenClusters = brokenBuffer.slice(brokenClusterOffset); | |
| console.log( | |
| `Extracted ${brokenClusters.length} bytes of cluster data from broken file` | |
| ); | |
| // Combine donor header with broken clusters | |
| const combined = Buffer.concat([donorHeader, brokenClusters]); | |
| // Write combined file | |
| const tempPath = outputPath.replace(/\.webm$/, "_temp.webm"); | |
| fs.writeFileSync(tempPath, combined); | |
| // Try to re-encode | |
| console.log("\nRe-encoding combined file..."); | |
| try { | |
| execSync( | |
| `ffmpeg -y -i "${tempPath}" -af "aresample=async=1" -c:a libopus -b:a 64k "${outputPath}" 2>&1`, | |
| { encoding: "utf-8", stdio: "pipe" } | |
| ); | |
| fs.unlinkSync(tempPath); | |
| const finalProbe = execSync( | |
| `ffprobe -v error -show_entries format=duration -of csv=p=0 "${outputPath}"`, | |
| { encoding: "utf-8" } | |
| ); | |
| const duration = parseFloat(finalProbe.trim()); | |
| return { | |
| success: true, | |
| outputPath, | |
| message: `Recovered ${Math.floor(duration / 60)}m ${(duration % 60).toFixed(2)}s using donor header.`, | |
| diagnostics: diagnoseWebmFile(brokenPath), | |
| }; | |
| } catch { | |
| fs.renameSync(tempPath, outputPath); | |
| return { | |
| success: true, | |
| outputPath, | |
| message: | |
| "Combined file saved. May need manual adjustment for codec compatibility.", | |
| diagnostics: diagnoseWebmFile(brokenPath), | |
| }; | |
| } | |
| } | |
| // ============================================================================ | |
| // CLI Interface | |
| // ============================================================================ | |
| function printUsage(): void { | |
| console.log(` | |
| WebM File Recovery Tool | |
| ======================= | |
| Usage: | |
| npx ts-node recover-webm.ts <input.webm> [output.webm] | |
| npx ts-node recover-webm.ts --diagnose <input.webm> | |
| npx ts-node recover-webm.ts --donor <broken.webm> <donor.webm> <output.webm> | |
| Options: | |
| --diagnose Only analyze the file, don't attempt recovery | |
| --donor Use a donor file's header for recovery | |
| Examples: | |
| # Recover a broken file | |
| npx ts-node recover-webm.ts broken.webm recovered.webm | |
| # Just diagnose without recovery | |
| npx ts-node recover-webm.ts --diagnose broken.webm | |
| # Use a working file's header to recover | |
| npx ts-node recover-webm.ts --donor broken.webm working.webm recovered.webm | |
| `); | |
| } | |
| function main(): void { | |
| const args = process.argv.slice(2); | |
| if (args.length === 0 || args.includes("--help") || args.includes("-h")) { | |
| printUsage(); | |
| process.exit(0); | |
| } | |
| // Diagnose mode | |
| if (args[0] === "--diagnose") { | |
| if (args.length < 2) { | |
| console.error("Error: Missing input file for diagnosis"); | |
| process.exit(1); | |
| } | |
| const inputPath = args[1]; | |
| if (!fs.existsSync(inputPath)) { | |
| console.error(`Error: File not found: ${inputPath}`); | |
| process.exit(1); | |
| } | |
| const diag = diagnoseWebmFile(inputPath); | |
| printDiagnostics(diag); | |
| process.exit(diag.isValid ? 0 : 1); | |
| } | |
| // Donor mode | |
| if (args[0] === "--donor") { | |
| if (args.length < 4) { | |
| console.error( | |
| "Error: Donor mode requires: --donor <broken.webm> <donor.webm> <output.webm>" | |
| ); | |
| process.exit(1); | |
| } | |
| const [, brokenPath, donorPath, outputPath] = args; | |
| if (!fs.existsSync(brokenPath)) { | |
| console.error(`Error: Broken file not found: ${brokenPath}`); | |
| process.exit(1); | |
| } | |
| if (!fs.existsSync(donorPath)) { | |
| console.error(`Error: Donor file not found: ${donorPath}`); | |
| process.exit(1); | |
| } | |
| const result = recoverWithDonorHeader(brokenPath, donorPath, outputPath); | |
| console.log(`\nResult: ${result.message}`); | |
| process.exit(result.success ? 0 : 1); | |
| } | |
| // Normal recovery mode | |
| const inputPath = args[0]; | |
| if (!fs.existsSync(inputPath)) { | |
| console.error(`Error: File not found: ${inputPath}`); | |
| process.exit(1); | |
| } | |
| const outputPath = | |
| args[1] || | |
| inputPath.replace(/\.webm(\.webm)?$/, "_recovered.webm"); | |
| const result = recoverWebmFile(inputPath, outputPath); | |
| console.log(`\nResult: ${result.message}`); | |
| process.exit(result.success ? 0 : 1); | |
| } | |
| main(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment