Skip to content

Instantly share code, notes, and snippets.

@banyudu
Created January 17, 2026 03:57
Show Gist options
  • Select an option

  • Save banyudu/c2cd466470a8a2ff1daecd5aed36b3eb to your computer and use it in GitHub Desktop.

Select an option

Save banyudu/c2cd466470a8a2ff1daecd5aed36b3eb to your computer and use it in GitHub Desktop.
Recover broken webm file
#!/usr/bin/env npx ts-node
/**
* WebM File Recovery Tool
*
* Recovers broken WebM files that have:
* - Missing or corrupted EBML headers
* - Prepended garbage data
* - Corrupted first byte of EBML magic
*
* Usage:
* npx ts-node recover-webm.ts <input.webm> [output.webm]
* # or with bun:
* bun recover-webm.ts <input.webm> [output.webm]
*
* @author Recovery Tool
* @license MIT
*/
import * as fs from "fs";
import * as path from "path";
import { execSync } from "child_process";
// ============================================================================
// Constants - WebM/EBML Element IDs and Signatures
// ============================================================================
const EBML_MAGIC = Buffer.from([0x1a, 0x45, 0xdf, 0xa3]);
const EBML_MAGIC_CORRUPTED_VARIANTS = [
Buffer.from([0xa3, 0x45, 0xdf, 0xa3]), // First byte corrupted to 0xa3
Buffer.from([0x00, 0x45, 0xdf, 0xa3]), // First byte is null
Buffer.from([0xff, 0x45, 0xdf, 0xa3]), // First byte is 0xff
];
const SEGMENT_ID = Buffer.from([0x18, 0x53, 0x80, 0x67]);
const CLUSTER_ID = Buffer.from([0x1f, 0x43, 0xb6, 0x75]);
const TRACKS_ID = Buffer.from([0x16, 0x54, 0xae, 0x6b]);
// String markers to search for
const MARKERS = {
webm: Buffer.from("webm", "ascii"),
chrome: Buffer.from("Chrome", "ascii"),
chromeWA: Buffer.from("ChromeWA", "ascii"),
opusHead: Buffer.from("OpusHead", "ascii"),
vorbis: Buffer.from("vorbis", "ascii"),
aOpus: Buffer.from("A_OPUS", "ascii"),
aVorbis: Buffer.from("A_VORBIS", "ascii"),
};
// ============================================================================
// Types
// ============================================================================
interface DiagnosticResult {
isValid: boolean;
hasEbmlHeader: boolean;
ebmlOffset: number;
isEbmlCorrupted: boolean;
corruptedByte?: number;
markers: { name: string; offset: number }[];
clusterOffsets: number[];
fileSize: number;
suggestedAction: string;
}
interface RecoveryResult {
success: boolean;
outputPath?: string;
message: string;
diagnostics: DiagnosticResult;
}
// ============================================================================
// Utility Functions
// ============================================================================
/**
* Find all occurrences of a pattern in a buffer
*/
function findAllOccurrences(
buffer: Buffer,
pattern: Buffer,
maxResults = 100
): number[] {
const offsets: number[] = [];
let offset = 0;
while (offset < buffer.length && offsets.length < maxResults) {
const idx = buffer.indexOf(pattern, offset);
if (idx === -1) break;
offsets.push(idx);
offset = idx + 1;
}
return offsets;
}
/**
* Find the first occurrence of any pattern from a list
*/
function findFirstMatch(
buffer: Buffer,
patterns: Buffer[]
): { pattern: Buffer; offset: number } | null {
let earliest: { pattern: Buffer; offset: number } | null = null;
for (const pattern of patterns) {
const idx = buffer.indexOf(pattern);
if (idx !== -1 && (earliest === null || idx < earliest.offset)) {
earliest = { pattern, offset: idx };
}
}
return earliest;
}
/**
* Check if buffer starts with EBML magic bytes
*/
function hasValidEbmlHeader(buffer: Buffer): boolean {
return buffer.slice(0, 4).equals(EBML_MAGIC);
}
/**
* Find EBML header location, considering potential corruption
*/
function findEbmlHeader(buffer: Buffer): {
offset: number;
isCorrupted: boolean;
corruptedByte?: number;
} | null {
// First, look for perfect match
const perfectMatch = buffer.indexOf(EBML_MAGIC);
if (perfectMatch !== -1) {
return { offset: perfectMatch, isCorrupted: false };
}
// Look for corrupted variants (only first byte different)
for (const variant of EBML_MAGIC_CORRUPTED_VARIANTS) {
const idx = buffer.indexOf(variant);
if (idx !== -1) {
return { offset: idx, isCorrupted: true, corruptedByte: buffer[idx] };
}
}
// Look for partial match (bytes 2-4 of EBML magic: 45 DF A3)
const partialMagic = EBML_MAGIC.slice(1); // [0x45, 0xdf, 0xa3]
let searchOffset = 0;
while (searchOffset < buffer.length - 3) {
const idx = buffer.indexOf(partialMagic, searchOffset);
if (idx === -1) break;
// Check if there's a byte before it that could be the corrupted first byte
if (idx > 0) {
const prevByte = buffer[idx - 1];
// Accept any byte except 0x1a (which would be a perfect match we already checked)
if (prevByte !== 0x1a) {
return { offset: idx - 1, isCorrupted: true, corruptedByte: prevByte };
}
}
searchOffset = idx + 1;
}
return null;
}
// ============================================================================
// Diagnostic Functions
// ============================================================================
/**
* Analyze a potentially broken WebM file
*/
function diagnoseWebmFile(filePath: string): DiagnosticResult {
const buffer = fs.readFileSync(filePath);
const result: DiagnosticResult = {
isValid: false,
hasEbmlHeader: false,
ebmlOffset: -1,
isEbmlCorrupted: false,
markers: [],
clusterOffsets: [],
fileSize: buffer.length,
suggestedAction: "",
};
// Check if file starts with valid EBML header
result.hasEbmlHeader = hasValidEbmlHeader(buffer);
result.isValid = result.hasEbmlHeader;
if (result.hasEbmlHeader) {
result.ebmlOffset = 0;
result.suggestedAction = "File appears valid. No recovery needed.";
return result;
}
// Find EBML header (possibly corrupted or at wrong offset)
const ebmlLocation = findEbmlHeader(buffer);
if (ebmlLocation) {
result.ebmlOffset = ebmlLocation.offset;
result.isEbmlCorrupted = ebmlLocation.isCorrupted;
result.corruptedByte = ebmlLocation.corruptedByte;
}
// Find markers
for (const [name, marker] of Object.entries(MARKERS)) {
const offsets = findAllOccurrences(buffer, marker, 3);
for (const offset of offsets) {
result.markers.push({ name, offset });
}
}
// Find clusters
result.clusterOffsets = findAllOccurrences(buffer, CLUSTER_ID, 10);
// Determine suggested action
if (result.ebmlOffset > 0) {
if (result.isEbmlCorrupted) {
result.suggestedAction = `Found corrupted EBML header at offset ${result.ebmlOffset} (first byte is 0x${result.corruptedByte?.toString(16).padStart(2, "0")} instead of 0x1a). Will extract from this offset and fix the header byte.`;
} else {
result.suggestedAction = `Found valid EBML header at offset ${result.ebmlOffset}. Will extract from this offset.`;
}
} else if (result.clusterOffsets.length > 0) {
result.suggestedAction = `No EBML header found, but found ${result.clusterOffsets.length} Cluster elements. May need header transplant from a donor file.`;
} else {
result.suggestedAction =
"Could not identify WebM structure. File may be encrypted or not a WebM file.";
}
return result;
}
/**
* Print diagnostic results
*/
function printDiagnostics(diag: DiagnosticResult): void {
console.log("\n" + "=".repeat(60));
console.log("WebM File Diagnostics");
console.log("=".repeat(60));
console.log(`\nFile size: ${diag.fileSize.toLocaleString()} bytes`);
console.log(`Valid EBML header at start: ${diag.hasEbmlHeader ? "Yes" : "No"}`);
if (diag.ebmlOffset >= 0) {
console.log(
`EBML header found at offset: 0x${diag.ebmlOffset.toString(16)} (${diag.ebmlOffset} bytes)`
);
if (diag.isEbmlCorrupted) {
console.log(
` - Header is CORRUPTED: first byte is 0x${diag.corruptedByte?.toString(16).padStart(2, "0")} instead of 0x1a`
);
}
} else {
console.log("EBML header: NOT FOUND");
}
if (diag.markers.length > 0) {
console.log("\nMarkers found:");
for (const marker of diag.markers) {
console.log(` - "${marker.name}" at offset 0x${marker.offset.toString(16)}`);
}
}
if (diag.clusterOffsets.length > 0) {
console.log(`\nCluster elements found: ${diag.clusterOffsets.length}`);
console.log(
` First cluster at: 0x${diag.clusterOffsets[0].toString(16)}`
);
}
console.log(`\nSuggested action: ${diag.suggestedAction}`);
console.log("=".repeat(60) + "\n");
}
// ============================================================================
// Recovery Functions
// ============================================================================
/**
* Attempt to recover a broken WebM file
*/
function recoverWebmFile(
inputPath: string,
outputPath: string
): RecoveryResult {
console.log(`\nAnalyzing: ${inputPath}`);
const diagnostics = diagnoseWebmFile(inputPath);
printDiagnostics(diagnostics);
if (diagnostics.isValid) {
return {
success: true,
message: "File is already valid, no recovery needed.",
diagnostics,
};
}
if (diagnostics.ebmlOffset < 0) {
return {
success: false,
message:
"Could not find EBML header or any recognizable WebM structure. Recovery not possible with this tool.",
diagnostics,
};
}
const buffer = fs.readFileSync(inputPath);
// Extract from EBML header offset
console.log(
`Extracting data from offset 0x${diagnostics.ebmlOffset.toString(16)}...`
);
const extracted = buffer.slice(diagnostics.ebmlOffset);
// Fix corrupted EBML header if needed
if (diagnostics.isEbmlCorrupted) {
console.log(
`Fixing corrupted EBML header byte: 0x${diagnostics.corruptedByte?.toString(16).padStart(2, "0")} -> 0x1a`
);
extracted[0] = 0x1a;
}
// Write intermediate file
const tempPath = outputPath.replace(/\.webm$/, "_temp.webm");
fs.writeFileSync(tempPath, extracted);
console.log(`Wrote intermediate file: ${tempPath}`);
// Verify with ffprobe
console.log("\nVerifying extracted file with ffprobe...");
try {
const probeResult = execSync(
`ffprobe -v error -show_entries stream=codec_name,duration -of json "${tempPath}"`,
{ encoding: "utf-8" }
);
const probeData = JSON.parse(probeResult);
console.log("Stream info:", JSON.stringify(probeData.streams, null, 2));
} catch (error) {
console.log("Warning: ffprobe verification failed, but continuing...");
}
// Re-encode to fix timestamps
console.log("\nRe-encoding to fix timestamps...");
try {
execSync(
`ffmpeg -y -i "${tempPath}" -af "aresample=async=1" -c:a libopus -b:a 64k "${outputPath}" 2>&1`,
{ encoding: "utf-8", stdio: "pipe" }
);
// Clean up temp file
fs.unlinkSync(tempPath);
// Get final file info
const finalProbe = execSync(
`ffprobe -v error -show_entries format=duration -of csv=p=0 "${outputPath}"`,
{ encoding: "utf-8" }
);
const duration = parseFloat(finalProbe.trim());
const minutes = Math.floor(duration / 60);
const seconds = (duration % 60).toFixed(2);
console.log(`\n✅ Recovery successful!`);
console.log(` Output: ${outputPath}`);
console.log(` Duration: ${minutes}m ${seconds}s`);
return {
success: true,
outputPath,
message: `Successfully recovered ${minutes}m ${seconds}s of audio.`,
diagnostics,
};
} catch (error) {
// If re-encoding fails, keep the extracted file
fs.renameSync(tempPath, outputPath);
console.log(
"\nWarning: Re-encoding failed, but extracted file may still be usable."
);
console.log(`Output saved to: ${outputPath}`);
return {
success: true,
outputPath,
message:
"Extracted file saved. Re-encoding failed but file may be partially playable.",
diagnostics,
};
}
}
/**
* Attempt recovery using a donor file's header
*/
function recoverWithDonorHeader(
brokenPath: string,
donorPath: string,
outputPath: string
): RecoveryResult {
console.log(`\nAttempting recovery with donor header...`);
console.log(`Broken file: ${brokenPath}`);
console.log(`Donor file: ${donorPath}`);
const brokenBuffer = fs.readFileSync(brokenPath);
const donorBuffer = fs.readFileSync(donorPath);
// Find first cluster in donor file
const donorClusterOffset = donorBuffer.indexOf(CLUSTER_ID);
if (donorClusterOffset === -1) {
return {
success: false,
message: "Donor file does not contain any Cluster elements.",
diagnostics: diagnoseWebmFile(brokenPath),
};
}
// Extract header from donor (everything before first cluster)
const donorHeader = donorBuffer.slice(0, donorClusterOffset);
console.log(
`Extracted ${donorHeader.length} bytes of header from donor file`
);
// Find first cluster in broken file
const brokenClusterOffset = brokenBuffer.indexOf(CLUSTER_ID);
if (brokenClusterOffset === -1) {
return {
success: false,
message: "Broken file does not contain any Cluster elements to recover.",
diagnostics: diagnoseWebmFile(brokenPath),
};
}
// Extract clusters from broken file
const brokenClusters = brokenBuffer.slice(brokenClusterOffset);
console.log(
`Extracted ${brokenClusters.length} bytes of cluster data from broken file`
);
// Combine donor header with broken clusters
const combined = Buffer.concat([donorHeader, brokenClusters]);
// Write combined file
const tempPath = outputPath.replace(/\.webm$/, "_temp.webm");
fs.writeFileSync(tempPath, combined);
// Try to re-encode
console.log("\nRe-encoding combined file...");
try {
execSync(
`ffmpeg -y -i "${tempPath}" -af "aresample=async=1" -c:a libopus -b:a 64k "${outputPath}" 2>&1`,
{ encoding: "utf-8", stdio: "pipe" }
);
fs.unlinkSync(tempPath);
const finalProbe = execSync(
`ffprobe -v error -show_entries format=duration -of csv=p=0 "${outputPath}"`,
{ encoding: "utf-8" }
);
const duration = parseFloat(finalProbe.trim());
return {
success: true,
outputPath,
message: `Recovered ${Math.floor(duration / 60)}m ${(duration % 60).toFixed(2)}s using donor header.`,
diagnostics: diagnoseWebmFile(brokenPath),
};
} catch {
fs.renameSync(tempPath, outputPath);
return {
success: true,
outputPath,
message:
"Combined file saved. May need manual adjustment for codec compatibility.",
diagnostics: diagnoseWebmFile(brokenPath),
};
}
}
// ============================================================================
// CLI Interface
// ============================================================================
function printUsage(): void {
console.log(`
WebM File Recovery Tool
=======================
Usage:
npx ts-node recover-webm.ts <input.webm> [output.webm]
npx ts-node recover-webm.ts --diagnose <input.webm>
npx ts-node recover-webm.ts --donor <broken.webm> <donor.webm> <output.webm>
Options:
--diagnose Only analyze the file, don't attempt recovery
--donor Use a donor file's header for recovery
Examples:
# Recover a broken file
npx ts-node recover-webm.ts broken.webm recovered.webm
# Just diagnose without recovery
npx ts-node recover-webm.ts --diagnose broken.webm
# Use a working file's header to recover
npx ts-node recover-webm.ts --donor broken.webm working.webm recovered.webm
`);
}
function main(): void {
const args = process.argv.slice(2);
if (args.length === 0 || args.includes("--help") || args.includes("-h")) {
printUsage();
process.exit(0);
}
// Diagnose mode
if (args[0] === "--diagnose") {
if (args.length < 2) {
console.error("Error: Missing input file for diagnosis");
process.exit(1);
}
const inputPath = args[1];
if (!fs.existsSync(inputPath)) {
console.error(`Error: File not found: ${inputPath}`);
process.exit(1);
}
const diag = diagnoseWebmFile(inputPath);
printDiagnostics(diag);
process.exit(diag.isValid ? 0 : 1);
}
// Donor mode
if (args[0] === "--donor") {
if (args.length < 4) {
console.error(
"Error: Donor mode requires: --donor <broken.webm> <donor.webm> <output.webm>"
);
process.exit(1);
}
const [, brokenPath, donorPath, outputPath] = args;
if (!fs.existsSync(brokenPath)) {
console.error(`Error: Broken file not found: ${brokenPath}`);
process.exit(1);
}
if (!fs.existsSync(donorPath)) {
console.error(`Error: Donor file not found: ${donorPath}`);
process.exit(1);
}
const result = recoverWithDonorHeader(brokenPath, donorPath, outputPath);
console.log(`\nResult: ${result.message}`);
process.exit(result.success ? 0 : 1);
}
// Normal recovery mode
const inputPath = args[0];
if (!fs.existsSync(inputPath)) {
console.error(`Error: File not found: ${inputPath}`);
process.exit(1);
}
const outputPath =
args[1] ||
inputPath.replace(/\.webm(\.webm)?$/, "_recovered.webm");
const result = recoverWebmFile(inputPath, outputPath);
console.log(`\nResult: ${result.message}`);
process.exit(result.success ? 0 : 1);
}
main();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment