Created
January 8, 2026 20:35
-
-
Save dy/1f0fab792570100c30b369fa77a5d0fe to your computer and use it in GitHub Desktop.
normalize ulebs
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| // Normalize canonical (5-byte) LEB128 encodings to minimal form | |
| // WebAssemblyText uses canonical LEB encodings with padding, watr uses minimal | |
| // This function converts canonical LEBs to minimal to allow byte-level comparison | |
| function normalizeLebs(bytes) { | |
| const result = []; | |
| let i = 0; | |
| // Helper to read a LEB128 (any size) | |
| function readLeb(start) { | |
| let value = 0; | |
| let shift = 0; | |
| let pos = start; | |
| let byte; | |
| do { | |
| byte = bytes[pos++]; | |
| value |= (byte & 0x7f) << shift; | |
| shift += 7; | |
| } while (byte & 0x80); | |
| return { value, length: pos - start }; | |
| } | |
| // Skip magic bytes and version | |
| if (bytes[0] === 0 && bytes[1] === 97 && bytes[2] === 115 && bytes[3] === 109) { | |
| result.push(0, 97, 115, 109, bytes[4], bytes[5], bytes[6], bytes[7]); | |
| i = 8; | |
| } | |
| // Process sections | |
| while (i < bytes.length) { | |
| const sectionId = bytes[i]; | |
| result.push(sectionId); | |
| i++; | |
| if (i >= bytes.length) break; | |
| // Read section size | |
| const sizeInfo = readLeb(i); | |
| i += sizeInfo.length; | |
| // Read section content | |
| const sectionContent = bytes.slice(i, i + sizeInfo.value); | |
| i += sizeInfo.value; | |
| // Normalize LEBs in section content | |
| const normalizedContent = []; | |
| let j = 0; | |
| while (j < sectionContent.length) { | |
| // Check for canonical LEB with padding: ends with [128, 128, 0] or more padding bytes | |
| // Could be: [X, 128, 128, 128, 0] or [X, Y, 128, 128, 0] etc. | |
| let foundCanonical = false; | |
| // Try to detect canonical padding by looking for continuation bit + padding pattern | |
| if (j + 4 < sectionContent.length && | |
| (sectionContent[j] & 0x80)) { | |
| // Check for 5-byte canonical: [X|0x80, 128, 128, 128, 0] | |
| if (sectionContent[j + 1] === 128 && | |
| sectionContent[j + 2] === 128 && | |
| sectionContent[j + 3] === 128 && | |
| sectionContent[j + 4] === 0) { | |
| // Decode full LEB value | |
| const value = (sectionContent[j] & 0x7f); | |
| const minimal = uleb(value); | |
| normalizedContent.push(...minimal); | |
| j += 5; | |
| foundCanonical = true; | |
| } | |
| // Check for 5-byte with 2-byte value: [X|0x80, Y|0x80, 128, 128, 0] | |
| else if (j + 4 < sectionContent.length && | |
| (sectionContent[j + 1] & 0x80) && | |
| sectionContent[j + 2] === 128 && | |
| sectionContent[j + 3] === 128 && | |
| sectionContent[j + 4] === 0) { | |
| const value = (sectionContent[j] & 0x7f) | | |
| ((sectionContent[j + 1] & 0x7f) << 7); | |
| const minimal = uleb(value); | |
| normalizedContent.push(...minimal); | |
| j += 5; | |
| foundCanonical = true; | |
| } | |
| } | |
| if (!foundCanonical) { | |
| normalizedContent.push(sectionContent[j]); | |
| j++; | |
| } | |
| } | |
| // Write corrected section size and content | |
| const newSize = uleb(normalizedContent.length); | |
| result.push(...newSize, ...normalizedContent); | |
| } | |
| return new Uint8Array(result); | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment