banyudu/recover-wemb.ts

## recover-wemb.ts
#!/usr/bin/env npx ts-node

/**
 * WebM File Recovery Tool
 *
 * Recovers broken WebM files that have:
 * - Missing or corrupted EBML headers
 * - Prepended garbage data
 * - Corrupted first byte of EBML magic
 *
 * Usage:
 *   npx ts-node recover-webm.ts <input.webm> [output.webm]
 *   # or with bun:
 *   bun recover-webm.ts <input.webm> [output.webm]
 *
 * @author Recovery Tool
 * @license MIT
 */

import * as fs from "fs";
import * as path from "path";
import { execSync } from "child_process";

// ============================================================================
// Constants - WebM/EBML Element IDs and Signatures
// ============================================================================

const EBML_MAGIC = Buffer.from([0x1a, 0x45, 0xdf, 0xa3]);
const EBML_MAGIC_CORRUPTED_VARIANTS = [
  Buffer.from([0xa3, 0x45, 0xdf, 0xa3]), // First byte corrupted to 0xa3
  Buffer.from([0x00, 0x45, 0xdf, 0xa3]), // First byte is null
  Buffer.from([0xff, 0x45, 0xdf, 0xa3]), // First byte is 0xff
];

const SEGMENT_ID = Buffer.from([0x18, 0x53, 0x80, 0x67]);
const CLUSTER_ID = Buffer.from([0x1f, 0x43, 0xb6, 0x75]);
const TRACKS_ID = Buffer.from([0x16, 0x54, 0xae, 0x6b]);

// String markers to search for
const MARKERS = {
  webm: Buffer.from("webm", "ascii"),
  chrome: Buffer.from("Chrome", "ascii"),
  chromeWA: Buffer.from("ChromeWA", "ascii"),
  opusHead: Buffer.from("OpusHead", "ascii"),
  vorbis: Buffer.from("vorbis", "ascii"),
  aOpus: Buffer.from("A_OPUS", "ascii"),
  aVorbis: Buffer.from("A_VORBIS", "ascii"),
};

// ============================================================================
// Types
// ============================================================================

interface DiagnosticResult {
  isValid: boolean;
  hasEbmlHeader: boolean;
  ebmlOffset: number;
  isEbmlCorrupted: boolean;
  corruptedByte?: number;
  markers: { name: string; offset: number }[];
  clusterOffsets: number[];
  fileSize: number;
  suggestedAction: string;
}

interface RecoveryResult {
  success: boolean;
  outputPath?: string;
  message: string;
  diagnostics: DiagnosticResult;
}

// ============================================================================
// Utility Functions
// ============================================================================

/**
 * Find all occurrences of a pattern in a buffer
 */
function findAllOccurrences(
  buffer: Buffer,
  pattern: Buffer,
  maxResults = 100
): number[] {
  const offsets: number[] = [];
  let offset = 0;

  while (offset < buffer.length && offsets.length < maxResults) {
    const idx = buffer.indexOf(pattern, offset);
    if (idx === -1) break;
    offsets.push(idx);
    offset = idx + 1;
  }

  return offsets;
}

/**
 * Find the first occurrence of any pattern from a list
 */
function findFirstMatch(
  buffer: Buffer,
  patterns: Buffer[]
): { pattern: Buffer; offset: number } | null {
  let earliest: { pattern: Buffer; offset: number } | null = null;

  for (const pattern of patterns) {
    const idx = buffer.indexOf(pattern);
    if (idx !== -1 && (earliest === null || idx < earliest.offset)) {
      earliest = { pattern, offset: idx };
    }
  }

  return earliest;
}

/**
 * Check if buffer starts with EBML magic bytes
 */
function hasValidEbmlHeader(buffer: Buffer): boolean {
  return buffer.slice(0, 4).equals(EBML_MAGIC);
}

/**
 * Find EBML header location, considering potential corruption
 */
function findEbmlHeader(buffer: Buffer): {
  offset: number;
  isCorrupted: boolean;
  corruptedByte?: number;
} | null {
  // First, look for perfect match
  const perfectMatch = buffer.indexOf(EBML_MAGIC);
  if (perfectMatch !== -1) {
    return { offset: perfectMatch, isCorrupted: false };
  }

  // Look for corrupted variants (only first byte different)
  for (const variant of EBML_MAGIC_CORRUPTED_VARIANTS) {
    const idx = buffer.indexOf(variant);
    if (idx !== -1) {
      return { offset: idx, isCorrupted: true, corruptedByte: buffer[idx] };
    }
  }

  // Look for partial match (bytes 2-4 of EBML magic: 45 DF A3)
  const partialMagic = EBML_MAGIC.slice(1); // [0x45, 0xdf, 0xa3]
  let searchOffset = 0;

  while (searchOffset < buffer.length - 3) {
    const idx = buffer.indexOf(partialMagic, searchOffset);
    if (idx === -1) break;

    // Check if there's a byte before it that could be the corrupted first byte
    if (idx > 0) {
      const prevByte = buffer[idx - 1];
      // Accept any byte except 0x1a (which would be a perfect match we already checked)
      if (prevByte !== 0x1a) {
        return { offset: idx - 1, isCorrupted: true, corruptedByte: prevByte };
      }
    }

    searchOffset = idx + 1;
  }

  return null;
}

// ============================================================================
// Diagnostic Functions
// ============================================================================

/**
 * Analyze a potentially broken WebM file
 */
function diagnoseWebmFile(filePath: string): DiagnosticResult {
  const buffer = fs.readFileSync(filePath);
  const result: DiagnosticResult = {
    isValid: false,
    hasEbmlHeader: false,
    ebmlOffset: -1,
    isEbmlCorrupted: false,
    markers: [],
    clusterOffsets: [],
    fileSize: buffer.length,
    suggestedAction: "",
  };

  // Check if file starts with valid EBML header
  result.hasEbmlHeader = hasValidEbmlHeader(buffer);
  result.isValid = result.hasEbmlHeader;

  if (result.hasEbmlHeader) {
    result.ebmlOffset = 0;
    result.suggestedAction = "File appears valid. No recovery needed.";
    return result;
  }

  // Find EBML header (possibly corrupted or at wrong offset)
  const ebmlLocation = findEbmlHeader(buffer);
  if (ebmlLocation) {
    result.ebmlOffset = ebmlLocation.offset;
    result.isEbmlCorrupted = ebmlLocation.isCorrupted;
    result.corruptedByte = ebmlLocation.corruptedByte;
  }

  // Find markers
  for (const [name, marker] of Object.entries(MARKERS)) {
    const offsets = findAllOccurrences(buffer, marker, 3);
    for (const offset of offsets) {
      result.markers.push({ name, offset });
    }
  }

  // Find clusters
  result.clusterOffsets = findAllOccurrences(buffer, CLUSTER_ID, 10);

  // Determine suggested action
  if (result.ebmlOffset > 0) {
    if (result.isEbmlCorrupted) {
      result.suggestedAction = `Found corrupted EBML header at offset ${result.ebmlOffset} (first byte is 0x${result.corruptedByte?.toString(16).padStart(2, "0")} instead of 0x1a). Will extract from this offset and fix the header byte.`;
    } else {
      result.suggestedAction = `Found valid EBML header at offset ${result.ebmlOffset}. Will extract from this offset.`;
    }
  } else if (result.clusterOffsets.length > 0) {
    result.suggestedAction = `No EBML header found, but found ${result.clusterOffsets.length} Cluster elements. May need header transplant from a donor file.`;
  } else {
    result.suggestedAction =
      "Could not identify WebM structure. File may be encrypted or not a WebM file.";
  }

  return result;
}

/**
 * Print diagnostic results
 */
function printDiagnostics(diag: DiagnosticResult): void {
  console.log("\n" + "=".repeat(60));
  console.log("WebM File Diagnostics");
  console.log("=".repeat(60));

  console.log(`\nFile size: ${diag.fileSize.toLocaleString()} bytes`);
  console.log(`Valid EBML header at start: ${diag.hasEbmlHeader ? "Yes" : "No"}`);

  if (diag.ebmlOffset >= 0) {
    console.log(
      `EBML header found at offset: 0x${diag.ebmlOffset.toString(16)} (${diag.ebmlOffset} bytes)`
    );
    if (diag.isEbmlCorrupted) {
      console.log(
        `  - Header is CORRUPTED: first byte is 0x${diag.corruptedByte?.toString(16).padStart(2, "0")} instead of 0x1a`
      );
    }
  } else {
    console.log("EBML header: NOT FOUND");
  }

  if (diag.markers.length > 0) {
    console.log("\nMarkers found:");
    for (const marker of diag.markers) {
      console.log(`  - "${marker.name}" at offset 0x${marker.offset.toString(16)}`);
    }
  }

  if (diag.clusterOffsets.length > 0) {
    console.log(`\nCluster elements found: ${diag.clusterOffsets.length}`);
    console.log(
      `  First cluster at: 0x${diag.clusterOffsets[0].toString(16)}`
    );
  }

  console.log(`\nSuggested action: ${diag.suggestedAction}`);
  console.log("=".repeat(60) + "\n");
}

// ============================================================================
// Recovery Functions
// ============================================================================

/**
 * Attempt to recover a broken WebM file
 */
function recoverWebmFile(
  inputPath: string,
  outputPath: string
): RecoveryResult {
  console.log(`\nAnalyzing: ${inputPath}`);

  const diagnostics = diagnoseWebmFile(inputPath);
  printDiagnostics(diagnostics);

  if (diagnostics.isValid) {
    return {
      success: true,
      message: "File is already valid, no recovery needed.",
      diagnostics,
    };
  }

  if (diagnostics.ebmlOffset < 0) {
    return {
      success: false,
      message:
        "Could not find EBML header or any recognizable WebM structure. Recovery not possible with this tool.",
      diagnostics,
    };
  }

  const buffer = fs.readFileSync(inputPath);

  // Extract from EBML header offset
  console.log(
    `Extracting data from offset 0x${diagnostics.ebmlOffset.toString(16)}...`
  );
  const extracted = buffer.slice(diagnostics.ebmlOffset);

  // Fix corrupted EBML header if needed
  if (diagnostics.isEbmlCorrupted) {
    console.log(
      `Fixing corrupted EBML header byte: 0x${diagnostics.corruptedByte?.toString(16).padStart(2, "0")} -> 0x1a`
    );
    extracted[0] = 0x1a;
  }

  // Write intermediate file
  const tempPath = outputPath.replace(/\.webm$/, "_temp.webm");
  fs.writeFileSync(tempPath, extracted);
  console.log(`Wrote intermediate file: ${tempPath}`);

  // Verify with ffprobe
  console.log("\nVerifying extracted file with ffprobe...");
  try {
    const probeResult = execSync(
      `ffprobe -v error -show_entries stream=codec_name,duration -of json "${tempPath}"`,
      { encoding: "utf-8" }
    );
    const probeData = JSON.parse(probeResult);
    console.log("Stream info:", JSON.stringify(probeData.streams, null, 2));
  } catch (error) {
    console.log("Warning: ffprobe verification failed, but continuing...");
  }

  // Re-encode to fix timestamps
  console.log("\nRe-encoding to fix timestamps...");
  try {
    execSync(
      `ffmpeg -y -i "${tempPath}" -af "aresample=async=1" -c:a libopus -b:a 64k "${outputPath}" 2>&1`,
      { encoding: "utf-8", stdio: "pipe" }
    );

    // Clean up temp file
    fs.unlinkSync(tempPath);

    // Get final file info
    const finalProbe = execSync(
      `ffprobe -v error -show_entries format=duration -of csv=p=0 "${outputPath}"`,
      { encoding: "utf-8" }
    );
    const duration = parseFloat(finalProbe.trim());
    const minutes = Math.floor(duration / 60);
    const seconds = (duration % 60).toFixed(2);

    console.log(`\n✅ Recovery successful!`);
    console.log(`   Output: ${outputPath}`);
    console.log(`   Duration: ${minutes}m ${seconds}s`);

    return {
      success: true,
      outputPath,
      message: `Successfully recovered ${minutes}m ${seconds}s of audio.`,
      diagnostics,
    };
  } catch (error) {
    // If re-encoding fails, keep the extracted file
    fs.renameSync(tempPath, outputPath);
    console.log(
      "\nWarning: Re-encoding failed, but extracted file may still be usable."
    );
    console.log(`Output saved to: ${outputPath}`);

    return {
      success: true,
      outputPath,
      message:
        "Extracted file saved. Re-encoding failed but file may be partially playable.",
      diagnostics,
    };
  }
}

/**
 * Attempt recovery using a donor file's header
 */
function recoverWithDonorHeader(
  brokenPath: string,
  donorPath: string,
  outputPath: string
): RecoveryResult {
  console.log(`\nAttempting recovery with donor header...`);
  console.log(`Broken file: ${brokenPath}`);
  console.log(`Donor file: ${donorPath}`);

  const brokenBuffer = fs.readFileSync(brokenPath);
  const donorBuffer = fs.readFileSync(donorPath);

  // Find first cluster in donor file
  const donorClusterOffset = donorBuffer.indexOf(CLUSTER_ID);
  if (donorClusterOffset === -1) {
    return {
      success: false,
      message: "Donor file does not contain any Cluster elements.",
      diagnostics: diagnoseWebmFile(brokenPath),
    };
  }

  // Extract header from donor (everything before first cluster)
  const donorHeader = donorBuffer.slice(0, donorClusterOffset);
  console.log(
    `Extracted ${donorHeader.length} bytes of header from donor file`
  );

  // Find first cluster in broken file
  const brokenClusterOffset = brokenBuffer.indexOf(CLUSTER_ID);
  if (brokenClusterOffset === -1) {
    return {
      success: false,
      message: "Broken file does not contain any Cluster elements to recover.",
      diagnostics: diagnoseWebmFile(brokenPath),
    };
  }

  // Extract clusters from broken file
  const brokenClusters = brokenBuffer.slice(brokenClusterOffset);
  console.log(
    `Extracted ${brokenClusters.length} bytes of cluster data from broken file`
  );

  // Combine donor header with broken clusters
  const combined = Buffer.concat([donorHeader, brokenClusters]);

  // Write combined file
  const tempPath = outputPath.replace(/\.webm$/, "_temp.webm");
  fs.writeFileSync(tempPath, combined);

  // Try to re-encode
  console.log("\nRe-encoding combined file...");
  try {
    execSync(
      `ffmpeg -y -i "${tempPath}" -af "aresample=async=1" -c:a libopus -b:a 64k "${outputPath}" 2>&1`,
      { encoding: "utf-8", stdio: "pipe" }
    );
    fs.unlinkSync(tempPath);

    const finalProbe = execSync(
      `ffprobe -v error -show_entries format=duration -of csv=p=0 "${outputPath}"`,
      { encoding: "utf-8" }
    );
    const duration = parseFloat(finalProbe.trim());

    return {
      success: true,
      outputPath,
      message: `Recovered ${Math.floor(duration / 60)}m ${(duration % 60).toFixed(2)}s using donor header.`,
      diagnostics: diagnoseWebmFile(brokenPath),
    };
  } catch {
    fs.renameSync(tempPath, outputPath);
    return {
      success: true,
      outputPath,
      message:
        "Combined file saved. May need manual adjustment for codec compatibility.",
      diagnostics: diagnoseWebmFile(brokenPath),
    };
  }
}

// ============================================================================
// CLI Interface
// ============================================================================

function printUsage(): void {
  console.log(`
WebM File Recovery Tool
=======================

Usage:
  npx ts-node recover-webm.ts <input.webm> [output.webm]
  npx ts-node recover-webm.ts --diagnose <input.webm>
  npx ts-node recover-webm.ts --donor <broken.webm> <donor.webm> <output.webm>

Options:
  --diagnose    Only analyze the file, don't attempt recovery
  --donor       Use a donor file's header for recovery

Examples:
  # Recover a broken file
  npx ts-node recover-webm.ts broken.webm recovered.webm

  # Just diagnose without recovery
  npx ts-node recover-webm.ts --diagnose broken.webm

  # Use a working file's header to recover
  npx ts-node recover-webm.ts --donor broken.webm working.webm recovered.webm
`);
}

function main(): void {
  const args = process.argv.slice(2);

  if (args.length === 0 || args.includes("--help") || args.includes("-h")) {
    printUsage();
    process.exit(0);
  }

  // Diagnose mode
  if (args[0] === "--diagnose") {
    if (args.length < 2) {
      console.error("Error: Missing input file for diagnosis");
      process.exit(1);
    }
    const inputPath = args[1];
    if (!fs.existsSync(inputPath)) {
      console.error(`Error: File not found: ${inputPath}`);
      process.exit(1);
    }
    const diag = diagnoseWebmFile(inputPath);
    printDiagnostics(diag);
    process.exit(diag.isValid ? 0 : 1);
  }

  // Donor mode
  if (args[0] === "--donor") {
    if (args.length < 4) {
      console.error(
        "Error: Donor mode requires: --donor <broken.webm> <donor.webm> <output.webm>"
      );
      process.exit(1);
    }
    const [, brokenPath, donorPath, outputPath] = args;
    if (!fs.existsSync(brokenPath)) {
      console.error(`Error: Broken file not found: ${brokenPath}`);
      process.exit(1);
    }
    if (!fs.existsSync(donorPath)) {
      console.error(`Error: Donor file not found: ${donorPath}`);
      process.exit(1);
    }
    const result = recoverWithDonorHeader(brokenPath, donorPath, outputPath);
    console.log(`\nResult: ${result.message}`);
    process.exit(result.success ? 0 : 1);
  }

  // Normal recovery mode
  const inputPath = args[0];
  if (!fs.existsSync(inputPath)) {
    console.error(`Error: File not found: ${inputPath}`);
    process.exit(1);
  }

  const outputPath =
    args[1] ||
    inputPath.replace(/\.webm(\.webm)?$/, "_recovered.webm");

  const result = recoverWebmFile(inputPath, outputPath);
  console.log(`\nResult: ${result.message}`);
  process.exit(result.success ? 0 : 1);
}

main();
	#!/usr/bin/env npx ts-node

	/**
	* WebM File Recovery Tool
	*
	* Recovers broken WebM files that have:
	* - Missing or corrupted EBML headers
	* - Prepended garbage data
	* - Corrupted first byte of EBML magic
	*
	* Usage:
	* npx ts-node recover-webm.ts <input.webm> [output.webm]
	* # or with bun:
	* bun recover-webm.ts <input.webm> [output.webm]
	*
	* @author Recovery Tool
	* @license MIT
	*/

	import * as fs from "fs";
	import * as path from "path";
	import { execSync } from "child_process";

	// ============================================================================
	// Constants - WebM/EBML Element IDs and Signatures
	// ============================================================================

	const EBML_MAGIC = Buffer.from([0x1a, 0x45, 0xdf, 0xa3]);
	const EBML_MAGIC_CORRUPTED_VARIANTS = [
	Buffer.from([0xa3, 0x45, 0xdf, 0xa3]), // First byte corrupted to 0xa3
	Buffer.from([0x00, 0x45, 0xdf, 0xa3]), // First byte is null
	Buffer.from([0xff, 0x45, 0xdf, 0xa3]), // First byte is 0xff
	];

	const SEGMENT_ID = Buffer.from([0x18, 0x53, 0x80, 0x67]);
	const CLUSTER_ID = Buffer.from([0x1f, 0x43, 0xb6, 0x75]);
	const TRACKS_ID = Buffer.from([0x16, 0x54, 0xae, 0x6b]);

	// String markers to search for
	const MARKERS = {
	webm: Buffer.from("webm", "ascii"),
	chrome: Buffer.from("Chrome", "ascii"),
	chromeWA: Buffer.from("ChromeWA", "ascii"),
	opusHead: Buffer.from("OpusHead", "ascii"),
	vorbis: Buffer.from("vorbis", "ascii"),
	aOpus: Buffer.from("A_OPUS", "ascii"),
	aVorbis: Buffer.from("A_VORBIS", "ascii"),
	};

	// ============================================================================
	// Types
	// ============================================================================

	interface DiagnosticResult {
	isValid: boolean;
	hasEbmlHeader: boolean;
	ebmlOffset: number;
	isEbmlCorrupted: boolean;
	corruptedByte?: number;
	markers: { name: string; offset: number }[];
	clusterOffsets: number[];
	fileSize: number;
	suggestedAction: string;
	}

	interface RecoveryResult {
	success: boolean;
	outputPath?: string;
	message: string;
	diagnostics: DiagnosticResult;
	}

	// ============================================================================
	// Utility Functions
	// ============================================================================

	/**
	* Find all occurrences of a pattern in a buffer
	*/
	function findAllOccurrences(
	buffer: Buffer,
	pattern: Buffer,
	maxResults = 100
	): number[] {
	const offsets: number[] = [];
	let offset = 0;

	while (offset < buffer.length && offsets.length < maxResults) {
	const idx = buffer.indexOf(pattern, offset);
	if (idx === -1) break;
	offsets.push(idx);
	offset = idx + 1;
	}

	return offsets;
	}

	/**
	* Find the first occurrence of any pattern from a list
	*/
	function findFirstMatch(
	buffer: Buffer,
	patterns: Buffer[]
	): { pattern: Buffer; offset: number } \| null {
	let earliest: { pattern: Buffer; offset: number } \| null = null;

	for (const pattern of patterns) {
	const idx = buffer.indexOf(pattern);
	if (idx !== -1 && (earliest === null \|\| idx < earliest.offset)) {
	earliest = { pattern, offset: idx };
	}
	}

	return earliest;
	}

	/**
	* Check if buffer starts with EBML magic bytes
	*/
	function hasValidEbmlHeader(buffer: Buffer): boolean {
	return buffer.slice(0, 4).equals(EBML_MAGIC);
	}

	/**
	* Find EBML header location, considering potential corruption
	*/
	function findEbmlHeader(buffer: Buffer): {
	offset: number;
	isCorrupted: boolean;
	corruptedByte?: number;
	} \| null {
	// First, look for perfect match
	const perfectMatch = buffer.indexOf(EBML_MAGIC);
	if (perfectMatch !== -1) {
	return { offset: perfectMatch, isCorrupted: false };
	}

	// Look for corrupted variants (only first byte different)
	for (const variant of EBML_MAGIC_CORRUPTED_VARIANTS) {
	const idx = buffer.indexOf(variant);
	if (idx !== -1) {
	return { offset: idx, isCorrupted: true, corruptedByte: buffer[idx] };
	}
	}

	// Look for partial match (bytes 2-4 of EBML magic: 45 DF A3)
	const partialMagic = EBML_MAGIC.slice(1); // [0x45, 0xdf, 0xa3]
	let searchOffset = 0;

	while (searchOffset < buffer.length - 3) {
	const idx = buffer.indexOf(partialMagic, searchOffset);
	if (idx === -1) break;

	// Check if there's a byte before it that could be the corrupted first byte
	if (idx > 0) {
	const prevByte = buffer[idx - 1];
	// Accept any byte except 0x1a (which would be a perfect match we already checked)
	if (prevByte !== 0x1a) {
	return { offset: idx - 1, isCorrupted: true, corruptedByte: prevByte };
	}
	}

	searchOffset = idx + 1;
	}

	return null;
	}

	// ============================================================================
	// Diagnostic Functions
	// ============================================================================

	/**
	* Analyze a potentially broken WebM file
	*/
	function diagnoseWebmFile(filePath: string): DiagnosticResult {
	const buffer = fs.readFileSync(filePath);
	const result: DiagnosticResult = {
	isValid: false,
	hasEbmlHeader: false,
	ebmlOffset: -1,
	isEbmlCorrupted: false,
	markers: [],
	clusterOffsets: [],
	fileSize: buffer.length,
	suggestedAction: "",
	};

	// Check if file starts with valid EBML header
	result.hasEbmlHeader = hasValidEbmlHeader(buffer);
	result.isValid = result.hasEbmlHeader;

	if (result.hasEbmlHeader) {
	result.ebmlOffset = 0;
	result.suggestedAction = "File appears valid. No recovery needed.";
	return result;
	}

	// Find EBML header (possibly corrupted or at wrong offset)
	const ebmlLocation = findEbmlHeader(buffer);
	if (ebmlLocation) {
	result.ebmlOffset = ebmlLocation.offset;
	result.isEbmlCorrupted = ebmlLocation.isCorrupted;
	result.corruptedByte = ebmlLocation.corruptedByte;
	}

	// Find markers
	for (const [name, marker] of Object.entries(MARKERS)) {
	const offsets = findAllOccurrences(buffer, marker, 3);
	for (const offset of offsets) {
	result.markers.push({ name, offset });
	}
	}

	// Find clusters
	result.clusterOffsets = findAllOccurrences(buffer, CLUSTER_ID, 10);

	// Determine suggested action
	if (result.ebmlOffset > 0) {
	if (result.isEbmlCorrupted) {
	result.suggestedAction = `Found corrupted EBML header at offset ${result.ebmlOffset} (first byte is 0x${result.corruptedByte?.toString(16).padStart(2, "0")} instead of 0x1a). Will extract from this offset and fix the header byte.`;
	} else {
	result.suggestedAction = `Found valid EBML header at offset ${result.ebmlOffset}. Will extract from this offset.`;
	}
	} else if (result.clusterOffsets.length > 0) {
	result.suggestedAction = `No EBML header found, but found ${result.clusterOffsets.length} Cluster elements. May need header transplant from a donor file.`;
	} else {
	result.suggestedAction =
	"Could not identify WebM structure. File may be encrypted or not a WebM file.";
	}

	return result;
	}

	/**
	* Print diagnostic results
	*/
	function printDiagnostics(diag: DiagnosticResult): void {
	console.log("\n" + "=".repeat(60));
	console.log("WebM File Diagnostics");
	console.log("=".repeat(60));

	console.log(`\nFile size: ${diag.fileSize.toLocaleString()} bytes`);
	console.log(`Valid EBML header at start: ${diag.hasEbmlHeader ? "Yes" : "No"}`);

	if (diag.ebmlOffset >= 0) {
	console.log(
	`EBML header found at offset: 0x${diag.ebmlOffset.toString(16)} (${diag.ebmlOffset} bytes)`
	);
	if (diag.isEbmlCorrupted) {
	console.log(
	` - Header is CORRUPTED: first byte is 0x${diag.corruptedByte?.toString(16).padStart(2, "0")} instead of 0x1a`
	);
	}
	} else {
	console.log("EBML header: NOT FOUND");
	}

	if (diag.markers.length > 0) {
	console.log("\nMarkers found:");
	for (const marker of diag.markers) {
	console.log(` - "${marker.name}" at offset 0x${marker.offset.toString(16)}`);
	}
	}

	if (diag.clusterOffsets.length > 0) {
	console.log(`\nCluster elements found: ${diag.clusterOffsets.length}`);
	console.log(
	` First cluster at: 0x${diag.clusterOffsets[0].toString(16)}`
	);
	}

	console.log(`\nSuggested action: ${diag.suggestedAction}`);
	console.log("=".repeat(60) + "\n");
	}

	// ============================================================================
	// Recovery Functions
	// ============================================================================

	/**
	* Attempt to recover a broken WebM file
	*/
	function recoverWebmFile(
	inputPath: string,
	outputPath: string
	): RecoveryResult {
	console.log(`\nAnalyzing: ${inputPath}`);

	const diagnostics = diagnoseWebmFile(inputPath);
	printDiagnostics(diagnostics);

	if (diagnostics.isValid) {
	return {
	success: true,
	message: "File is already valid, no recovery needed.",
	diagnostics,
	};
	}

	if (diagnostics.ebmlOffset < 0) {
	return {
	success: false,
	message:
	"Could not find EBML header or any recognizable WebM structure. Recovery not possible with this tool.",
	diagnostics,
	};
	}

	const buffer = fs.readFileSync(inputPath);

	// Extract from EBML header offset
	console.log(
	`Extracting data from offset 0x${diagnostics.ebmlOffset.toString(16)}...`
	);
	const extracted = buffer.slice(diagnostics.ebmlOffset);

	// Fix corrupted EBML header if needed
	if (diagnostics.isEbmlCorrupted) {
	console.log(
	`Fixing corrupted EBML header byte: 0x${diagnostics.corruptedByte?.toString(16).padStart(2, "0")} -> 0x1a`
	);
	extracted[0] = 0x1a;
	}

	// Write intermediate file
	const tempPath = outputPath.replace(/\.webm$/, "_temp.webm");
	fs.writeFileSync(tempPath, extracted);
	console.log(`Wrote intermediate file: ${tempPath}`);

	// Verify with ffprobe
	console.log("\nVerifying extracted file with ffprobe...");
	try {
	const probeResult = execSync(
	`ffprobe -v error -show_entries stream=codec_name,duration -of json "${tempPath}"`,
	{ encoding: "utf-8" }
	);
	const probeData = JSON.parse(probeResult);
	console.log("Stream info:", JSON.stringify(probeData.streams, null, 2));
	} catch (error) {
	console.log("Warning: ffprobe verification failed, but continuing...");
	}

	// Re-encode to fix timestamps
	console.log("\nRe-encoding to fix timestamps...");
	try {
	execSync(
	`ffmpeg -y -i "${tempPath}" -af "aresample=async=1" -c:a libopus -b:a 64k "${outputPath}" 2>&1`,
	{ encoding: "utf-8", stdio: "pipe" }
	);

	// Clean up temp file
	fs.unlinkSync(tempPath);

	// Get final file info
	const finalProbe = execSync(
	`ffprobe -v error -show_entries format=duration -of csv=p=0 "${outputPath}"`,
	{ encoding: "utf-8" }
	);
	const duration = parseFloat(finalProbe.trim());
	const minutes = Math.floor(duration / 60);
	const seconds = (duration % 60).toFixed(2);

	console.log(`\n✅ Recovery successful!`);
	console.log(` Output: ${outputPath}`);
	console.log(` Duration: ${minutes}m ${seconds}s`);

	return {
	success: true,
	outputPath,
	message: `Successfully recovered ${minutes}m ${seconds}s of audio.`,
	diagnostics,
	};
	} catch (error) {
	// If re-encoding fails, keep the extracted file
	fs.renameSync(tempPath, outputPath);
	console.log(
	"\nWarning: Re-encoding failed, but extracted file may still be usable."
	);
	console.log(`Output saved to: ${outputPath}`);

	return {
	success: true,
	outputPath,
	message:
	"Extracted file saved. Re-encoding failed but file may be partially playable.",
	diagnostics,
	};
	}
	}

	/**
	* Attempt recovery using a donor file's header
	*/
	function recoverWithDonorHeader(
	brokenPath: string,
	donorPath: string,
	outputPath: string
	): RecoveryResult {
	console.log(`\nAttempting recovery with donor header...`);
	console.log(`Broken file: ${brokenPath}`);
	console.log(`Donor file: ${donorPath}`);

	const brokenBuffer = fs.readFileSync(brokenPath);
	const donorBuffer = fs.readFileSync(donorPath);

	// Find first cluster in donor file
	const donorClusterOffset = donorBuffer.indexOf(CLUSTER_ID);
	if (donorClusterOffset === -1) {
	return {
	success: false,
	message: "Donor file does not contain any Cluster elements.",
	diagnostics: diagnoseWebmFile(brokenPath),
	};
	}

	// Extract header from donor (everything before first cluster)
	const donorHeader = donorBuffer.slice(0, donorClusterOffset);
	console.log(
	`Extracted ${donorHeader.length} bytes of header from donor file`
	);

	// Find first cluster in broken file
	const brokenClusterOffset = brokenBuffer.indexOf(CLUSTER_ID);
	if (brokenClusterOffset === -1) {
	return {
	success: false,
	message: "Broken file does not contain any Cluster elements to recover.",
	diagnostics: diagnoseWebmFile(brokenPath),
	};
	}

	// Extract clusters from broken file
	const brokenClusters = brokenBuffer.slice(brokenClusterOffset);
	console.log(
	`Extracted ${brokenClusters.length} bytes of cluster data from broken file`
	);

	// Combine donor header with broken clusters
	const combined = Buffer.concat([donorHeader, brokenClusters]);

	// Write combined file
	const tempPath = outputPath.replace(/\.webm$/, "_temp.webm");
	fs.writeFileSync(tempPath, combined);

	// Try to re-encode
	console.log("\nRe-encoding combined file...");
	try {
	execSync(
	`ffmpeg -y -i "${tempPath}" -af "aresample=async=1" -c:a libopus -b:a 64k "${outputPath}" 2>&1`,
	{ encoding: "utf-8", stdio: "pipe" }
	);
	fs.unlinkSync(tempPath);

	const finalProbe = execSync(
	`ffprobe -v error -show_entries format=duration -of csv=p=0 "${outputPath}"`,
	{ encoding: "utf-8" }
	);
	const duration = parseFloat(finalProbe.trim());

	return {
	success: true,
	outputPath,
	message: `Recovered ${Math.floor(duration / 60)}m ${(duration % 60).toFixed(2)}s using donor header.`,
	diagnostics: diagnoseWebmFile(brokenPath),
	};
	} catch {
	fs.renameSync(tempPath, outputPath);
	return {
	success: true,
	outputPath,
	message:
	"Combined file saved. May need manual adjustment for codec compatibility.",
	diagnostics: diagnoseWebmFile(brokenPath),
	};
	}
	}

	// ============================================================================
	// CLI Interface
	// ============================================================================

	function printUsage(): void {
	console.log(`
	WebM File Recovery Tool
	=======================

	Usage:
	npx ts-node recover-webm.ts <input.webm> [output.webm]
	npx ts-node recover-webm.ts --diagnose <input.webm>
	npx ts-node recover-webm.ts --donor <broken.webm> <donor.webm> <output.webm>

	Options:
	--diagnose Only analyze the file, don't attempt recovery
	--donor Use a donor file's header for recovery

	Examples:
	# Recover a broken file
	npx ts-node recover-webm.ts broken.webm recovered.webm

	# Just diagnose without recovery
	npx ts-node recover-webm.ts --diagnose broken.webm

	# Use a working file's header to recover
	npx ts-node recover-webm.ts --donor broken.webm working.webm recovered.webm
	`);
	}

	function main(): void {
	const args = process.argv.slice(2);

	if (args.length === 0 \|\| args.includes("--help") \|\| args.includes("-h")) {
	printUsage();
	process.exit(0);
	}

	// Diagnose mode
	if (args[0] === "--diagnose") {
	if (args.length < 2) {
	console.error("Error: Missing input file for diagnosis");
	process.exit(1);
	}
	const inputPath = args[1];
	if (!fs.existsSync(inputPath)) {
	console.error(`Error: File not found: ${inputPath}`);
	process.exit(1);
	}
	const diag = diagnoseWebmFile(inputPath);
	printDiagnostics(diag);
	process.exit(diag.isValid ? 0 : 1);
	}

	// Donor mode
	if (args[0] === "--donor") {
	if (args.length < 4) {
	console.error(
	"Error: Donor mode requires: --donor <broken.webm> <donor.webm> <output.webm>"
	);
	process.exit(1);
	}
	const [, brokenPath, donorPath, outputPath] = args;
	if (!fs.existsSync(brokenPath)) {
	console.error(`Error: Broken file not found: ${brokenPath}`);
	process.exit(1);
	}
	if (!fs.existsSync(donorPath)) {
	console.error(`Error: Donor file not found: ${donorPath}`);
	process.exit(1);
	}
	const result = recoverWithDonorHeader(brokenPath, donorPath, outputPath);
	console.log(`\nResult: ${result.message}`);
	process.exit(result.success ? 0 : 1);
	}

	// Normal recovery mode
	const inputPath = args[0];
	if (!fs.existsSync(inputPath)) {
	console.error(`Error: File not found: ${inputPath}`);
	process.exit(1);
	}

	const outputPath =
	args[1] \|\|
	inputPath.replace(/\.webm(\.webm)?$/, "_recovered.webm");

	const result = recoverWebmFile(inputPath, outputPath);
	console.log(`\nResult: ${result.message}`);
	process.exit(result.success ? 0 : 1);
	}

	main();
No results found