dy/normalize-uleb

## normalize-uleb

// Normalize canonical (5-byte) LEB128 encodings to minimal form
// WebAssemblyText uses canonical LEB encodings with padding, watr uses minimal
// This function converts canonical LEBs to minimal to allow byte-level comparison
function normalizeLebs(bytes) {
  const result = [];
  let i = 0;

  // Helper to read a LEB128 (any size)
  function readLeb(start) {
    let value = 0;
    let shift = 0;
    let pos = start;
    let byte;
    do {
      byte = bytes[pos++];
      value |= (byte & 0x7f) << shift;
      shift += 7;
    } while (byte & 0x80);
    return { value, length: pos - start };
  }

  // Skip magic bytes and version
  if (bytes[0] === 0 && bytes[1] === 97 && bytes[2] === 115 && bytes[3] === 109) {
    result.push(0, 97, 115, 109, bytes[4], bytes[5], bytes[6], bytes[7]);
    i = 8;
  }

  // Process sections
  while (i < bytes.length) {
    const sectionId = bytes[i];
    result.push(sectionId);
    i++;

    if (i >= bytes.length) break;

    // Read section size
    const sizeInfo = readLeb(i);
    i += sizeInfo.length;

    // Read section content
    const sectionContent = bytes.slice(i, i + sizeInfo.value);
    i += sizeInfo.value;

    // Normalize LEBs in section content
    const normalizedContent = [];
    let j = 0;
    while (j < sectionContent.length) {
      // Check for canonical LEB with padding: ends with [128, 128, 0] or more padding bytes
      // Could be: [X, 128, 128, 128, 0] or [X, Y, 128, 128, 0] etc.
      let foundCanonical = false;

      // Try to detect canonical padding by looking for continuation bit + padding pattern
      if (j + 4 < sectionContent.length &&
          (sectionContent[j] & 0x80)) {

        // Check for 5-byte canonical: [X|0x80, 128, 128, 128, 0]
        if (sectionContent[j + 1] === 128 &&
            sectionContent[j + 2] === 128 &&
            sectionContent[j + 3] === 128 &&
            sectionContent[j + 4] === 0) {
          // Decode full LEB value
          const value = (sectionContent[j] & 0x7f);
          const minimal = uleb(value);
          normalizedContent.push(...minimal);
          j += 5;
          foundCanonical = true;
        }
        // Check for 5-byte with 2-byte value: [X|0x80, Y|0x80, 128, 128, 0]
        else if (j + 4 < sectionContent.length &&
                 (sectionContent[j + 1] & 0x80) &&
                 sectionContent[j + 2] === 128 &&
                 sectionContent[j + 3] === 128 &&
                 sectionContent[j + 4] === 0) {
          const value = (sectionContent[j] & 0x7f) |
                       ((sectionContent[j + 1] & 0x7f) << 7);
          const minimal = uleb(value);
          normalizedContent.push(...minimal);
          j += 5;
          foundCanonical = true;
        }
      }

      if (!foundCanonical) {
        normalizedContent.push(sectionContent[j]);
        j++;
      }
    }

    // Write corrected section size and content
    const newSize = uleb(normalizedContent.length);
    result.push(...newSize, ...normalizedContent);
  }

  return new Uint8Array(result);
}

	// Normalize canonical (5-byte) LEB128 encodings to minimal form
	// WebAssemblyText uses canonical LEB encodings with padding, watr uses minimal
	// This function converts canonical LEBs to minimal to allow byte-level comparison
	function normalizeLebs(bytes) {
	const result = [];
	let i = 0;

	// Helper to read a LEB128 (any size)
	function readLeb(start) {
	let value = 0;
	let shift = 0;
	let pos = start;
	let byte;
	do {
	byte = bytes[pos++];
	value \|= (byte & 0x7f) << shift;
	shift += 7;
	} while (byte & 0x80);
	return { value, length: pos - start };
	}

	// Skip magic bytes and version
	if (bytes[0] === 0 && bytes[1] === 97 && bytes[2] === 115 && bytes[3] === 109) {
	result.push(0, 97, 115, 109, bytes[4], bytes[5], bytes[6], bytes[7]);
	i = 8;
	}

	// Process sections
	while (i < bytes.length) {
	const sectionId = bytes[i];
	result.push(sectionId);
	i++;

	if (i >= bytes.length) break;

	// Read section size
	const sizeInfo = readLeb(i);
	i += sizeInfo.length;

	// Read section content
	const sectionContent = bytes.slice(i, i + sizeInfo.value);
	i += sizeInfo.value;

	// Normalize LEBs in section content
	const normalizedContent = [];
	let j = 0;
	while (j < sectionContent.length) {
	// Check for canonical LEB with padding: ends with [128, 128, 0] or more padding bytes
	// Could be: [X, 128, 128, 128, 0] or [X, Y, 128, 128, 0] etc.
	let foundCanonical = false;

	// Try to detect canonical padding by looking for continuation bit + padding pattern
	if (j + 4 < sectionContent.length &&
	(sectionContent[j] & 0x80)) {

	// Check for 5-byte canonical: [X\|0x80, 128, 128, 128, 0]
	if (sectionContent[j + 1] === 128 &&
	sectionContent[j + 2] === 128 &&
	sectionContent[j + 3] === 128 &&
	sectionContent[j + 4] === 0) {
	// Decode full LEB value
	const value = (sectionContent[j] & 0x7f);
	const minimal = uleb(value);
	normalizedContent.push(...minimal);
	j += 5;
	foundCanonical = true;
	}
	// Check for 5-byte with 2-byte value: [X\|0x80, Y\|0x80, 128, 128, 0]
	else if (j + 4 < sectionContent.length &&
	(sectionContent[j + 1] & 0x80) &&
	sectionContent[j + 2] === 128 &&
	sectionContent[j + 3] === 128 &&
	sectionContent[j + 4] === 0) {
	const value = (sectionContent[j] & 0x7f) \|
	((sectionContent[j + 1] & 0x7f) << 7);
	const minimal = uleb(value);
	normalizedContent.push(...minimal);
	j += 5;
	foundCanonical = true;
	}
	}

	if (!foundCanonical) {
	normalizedContent.push(sectionContent[j]);
	j++;
	}
	}

	// Write corrected section size and content
	const newSize = uleb(normalizedContent.length);
	result.push(...newSize, ...normalizedContent);
	}

	return new Uint8Array(result);
	}
No results found