Last active
February 26, 2026 07:09
-
-
Save eiz/5a85c2465b1df44a97544719bed5b2c8 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| // asm.s — self-hosting aarch64 assembler | |
| // | |
| // reads an aarch64 assembly source file (GAS-compatible subset), | |
| // emits a static PIE ELF binary directly. no linker required. | |
| // | |
| // usage: asm <input.s> <output> | |
| // | |
| // this file was created solely by Claude (Opus 4.6 and Sonnet 4.6) and is | |
| // in the public domain (or CC0 1.0, if you prefer). | |
| // | |
| // to bootstrap with GNU tools: gcc -o asm0 -nostdlib asm.s && ./asm0 asm.s asm | |
| // | |
| // current binary size: 5543 bytes | |
| // | |
| // ── supported instructions ──────────────────────────────────────────────── | |
| // | |
| // arithmetic/logic: | |
| // add Rd, Rn, #imm12 | Rm [, lsl #N] sub (same forms) | |
| // adds Rd, Rn, #imm12 | Rm [, lsl #N] subs (same forms) | |
| // cmp Rn, #imm12 | Rm [, lsl #N] cmn (same forms) | |
| // and Rd, Rn, #bitmask | Rm [, lsl #N] orr, eor (same forms) | |
| // ands Rd, Rn, #bitmask | Rm [, lsl #N] (flag-setting AND) | |
| // tst Rn, #bitmask | Rm (ANDS alias, Rd=XZR) | |
| // bic Rd, Rn, Rm | |
| // neg Rd, Rm mvn Rd, Rm | |
| // mul Rd, Rn, Rm msub Rd, Rn, Rm, Ra | |
| // madd Rd, Rn, Rm, Ra (Rd = Ra + Rn*Rm) | |
| // udiv Rd, Rn, Rm sdiv Rd, Rn, Rm | |
| // nop | |
| // | |
| // moves: | |
| // mov Rd, Rm | #imm (MOVZ/MOVN-encodable) | SP | |
| // movz Rd, #imm16 [, lsl #N] movn, movk (same forms) | |
| // | |
| // shifts: | |
| // lsl Rd, Rn, Rm | #N lsr, asr (same forms) | |
| // ror Rd, Rn, Rm | |
| // | |
| // bitfield: | |
| // ubfm Rd, Rn, #immr, #imms sbfm, bfm (same forms) | |
| // ubfx Rd, Rn, #lsb, #width sbfx (same form) | |
| // ubfiz Rd, Rn, #lsb, #width sbfiz, bfi (same form) | |
| // bfxil Rd, Rn, #lsb, #width | |
| // sxtb Rd, Wn sxth Rd, Wn sxtw Rd, Wn | |
| // uxtb Wd, Wn uxth Wd, Wn | |
| // | |
| // bit manipulation: | |
| // clz Rd, Rn rbit Rd, Rn | |
| // | |
| // branches: | |
| // b label bl label br Xn blr Xn ret | |
| // b.cc label (eq ne hs lo mi pl vs vc hi ls ge lt gt le al cs cc) | |
| // cbz Rt, label cbnz Rt, label | |
| // tbz Rt, #bit, label tbnz Rt, #bit, label | |
| // | |
| // conditional: | |
| // csel Rd, Rn, Rm, cc csinc Rd, Rn, Rm, cc | |
| // cset Rd, cc | |
| // | |
| // address: | |
| // adr Rd, expr adrp Rd, symbol | |
| // | |
| // load/store (single): | |
| // ldr Rt, [Rn {, #imm | :lo12:sym}] str (same forms) | |
| // ldr Rt, [Rn, Rm {, lsl #N}] str (same forms) | |
| // ldr Rt, [Rn, #simm9]! str (pre-index) | |
| // ldr Rt, [Rn], #simm9 str (post-index) | |
| // ldr Rt, label (PC-relative literal) | |
| // ldrb (same addressing modes) strb | |
| // ldrh (same addressing modes) strh | |
| // ldrsb Rt, [Rn {, ...}] ldrsh, ldrsw | |
| // | |
| // load/store (pair): | |
| // ldp Rt1, Rt2, [Rn {, #imm}] stp (same forms) | |
| // ldp Rt1, Rt2, [Rn, #imm]! stp (pre-index) | |
| // ldp Rt1, Rt2, [Rn], #imm stp (post-index) | |
| // | |
| // system: | |
| // svc #imm16 | |
| // | |
| // ── registers ───────────────────────────────────────────────────────────── | |
| // x0-x30, w0-w30, xzr, wzr, sp (no fp/lr aliases) | |
| // | |
| // ── directives ──────────────────────────────────────────────────────────── | |
| // .text .bss .section .rodata .global name .equ name, expr | |
| // .word expr .ascii "str" .asciz "str" | |
| // .align N .skip N | |
| // | |
| // ── expressions ─────────────────────────────────────────────────────────── | |
| // operators: | & + - * << >> unary: ~ - grouping: ( ) | |
| // atoms: 123 0xFF 'A' '\n' . label :lo12:expr | |
| // labels: name: N: (numeric 0-9, ref as Nf/Nb) | |
| // comments: // | |
| // | |
| // ── output ──────────────────────────────────────────────────────────────── | |
| // ELF64 static PIE, single LOAD segment (RWX), no section headers. | |
| // .text is dictionary-compressed; decompressor stub runs at entry. | |
| // | |
| // ── syscall numbers ─────────────────────────────────────────────────────── | |
| .equ SYS_exit, 93 | |
| .equ SYS_read, 63 | |
| .equ SYS_write, 64 | |
| .equ SYS_openat, 56 | |
| .equ SYS_close, 57 | |
| .equ SYS_fchmod, 52 | |
| // ── file constants ──────────────────────────────────────────────────────── | |
| .equ AT_FDCWD, -100 | |
| .equ O_RDONLY, 0 | |
| .equ O_WRONLY_CREAT_TRUNC, 577 // O_WRONLY|O_CREAT|O_TRUNC = 1|64|512 | |
| .equ STDERR, 2 | |
| // ── ELF constants ───────────────────────────────────────────────────────── | |
| .equ ELF_HEADER_SIZE, 64 | |
| .equ PHDR_SIZE, 56 | |
| .equ CODE_START, 120 // ELF_HEADER_SIZE + PHDR_SIZE | |
| // ── compression constants ───────────────────────────────────────────────── | |
| // STUB_SIZE and STUB_DATA_* are computed from labels after _decomp_stub_end | |
| .equ FULL_DICT_ENTRIES, 126 | |
| .equ HALF_DICT_ENTRIES, 128 | |
| .equ FULL_DICT_SIZE, 504 // 126 * 4 | |
| .equ HALF_DICT_SIZE, 256 // 128 * 2 | |
| // ── section IDs ─────────────────────────────────────────────────────────── | |
| .equ SEC_TEXT, 0 // pre-multiplied by 8 for direct state block indexing | |
| .equ SEC_RODATA, 8 | |
| .equ SEC_BSS, 16 | |
| // ── state block offsets (all u64) ───────────────────────────────────────── | |
| .equ ST_TEXT_POS, 0 // current offset within .text | |
| .equ ST_RODATA_POS, 8 // current offset within .rodata | |
| .equ ST_BSS_POS, 16 // current offset within .bss | |
| .equ ST_CUR_SEC, 24 // current section (SEC_TEXT/RODATA/BSS) | |
| .equ ST_TEXT_BASE, 32 // virtual address of .text start | |
| .equ ST_RODATA_BASE, 40 // virtual address of .rodata start | |
| .equ ST_BSS_BASE, 48 // virtual address of .bss start | |
| .equ ST_PASS, 56 // current pass (1 or 2) | |
| .equ ST_LINE_NUM, 64 // current source line number | |
| .equ ST_INPUT_LEN, 72 // input file length in bytes | |
| .equ ST_FILE_SIZE, 80 // total output file size | |
| .equ ST_MEM_SIZE, 88 // total memory size (file + bss) | |
| .equ ST_INPUT_NAME, 104 // pointer to input filename string | |
| .equ ST_OUTPUT_NAME, 112 // pointer to output filename string | |
| .equ ST_SIZE, 120 | |
| // ── symbol table entry layout (32 bytes) ────────────────────────────────── | |
| // name_ptr u64 @ 0 pointer to name in input buffer (0 = empty slot) | |
| // name_len u32 @ 8 length of name | |
| // flags u64 @ 16 SYMF_* bits | |
| // value u64 @ 24 address or .equ value | |
| .equ SYM_ENT_SIZE, 32 | |
| .equ SYM_NAME_PTR, 0 | |
| .equ SYM_NAME_LEN, 8 | |
| .equ SYM_FLAGS, 16 | |
| .equ SYM_VALUE, 24 | |
| .equ SYM_TBL_SLOTS, 1024 // must be power of 2 | |
| // ── symbol flags ────────────────────────────────────────────────────────── | |
| .equ SYMF_DEFINED, 1 | |
| .equ SYMF_GLOBAL, 2 | |
| .equ SYMF_EQU, 4 | |
| .equ SYMF_SEC_SHIFT, 4 // section stored in bits 5:4 of flags | |
| // ── buffer sizes ────────────────────────────────────────────────────────── | |
| .equ INPUT_BUF_SIZE, 1048576 // 1 MB | |
| .equ TEXT_BUF_SIZE, 1048576 // 1 MB | |
| .equ RODATA_BUF_SIZE, 1048576 // 1 MB | |
| .equ SYM_TBL_BYTES, 32768 // SYM_TBL_SLOTS * SYM_ENT_SIZE | |
| // ── BSS offsets from x28 (state block pointer) ──────────────────────────── | |
| .equ NUMLAB_CNTS_OFF, ST_SIZE // 128 | |
| .equ NUMLAB_CURS_OFF, NUMLAB_CNTS_OFF + NUMLAB_DIGITS * 8 // 208 | |
| .equ INPUT_BUF_OFF, NUMLAB_CURS_OFF + NUMLAB_DIGITS * 8 // 288 | |
| // ── numeric labels ──────────────────────────────────────────────────────── | |
| .equ NUMLAB_MAX_DEFS, 128 // max definitions per digit | |
| .equ NUMLAB_DIGITS, 10 // digits 0-9 | |
| // ══════════════════════════════════════════════════════════════════════════ | |
| // BSS | |
| // ══════════════════════════════════════════════════════════════════════════ | |
| .bss | |
| .align 4 | |
| state: .skip ST_SIZE | |
| numlab_cnts: .skip NUMLAB_DIGITS * 8 | |
| numlab_curs: .skip NUMLAB_DIGITS * 8 | |
| input_buf: .skip INPUT_BUF_SIZE | |
| text_buf: .skip TEXT_BUF_SIZE | |
| rodata_buf: .skip RODATA_BUF_SIZE | |
| sym_table: .skip SYM_TBL_BYTES | |
| // numeric label storage: 10 digits × 128 defs × 8 bytes | |
| numlab_defs: .skip NUMLAB_DIGITS * NUMLAB_MAX_DEFS * 8 | |
| // ══════════════════════════════════════════════════════════════════════════ | |
| // Read-only data | |
| // ══════════════════════════════════════════════════════════════════════════ | |
| .section .rodata | |
| msg_usage: .asciz "usage: asm <input.s> <output>\n" | |
| msg_open: .asciz "cannot open input file\n" | |
| msg_create: .asciz "cannot create output file\n" | |
| msg_syntax: .asciz "syntax error\n" | |
| msg_undef: .asciz "undefined symbol\n" | |
| msg_badins: .asciz "unknown instruction\n" | |
| msg_badimm: .asciz "invalid immediate\n" | |
| // condition code XOR lookup: cond_xor_tbl[(c0^c1) & 0x1F] = cond code (31=invalid) | |
| cond_xor_tbl: | |
| .word 0x030A0803 | |
| .word 0x1F1F0604 | |
| .word 0x011F0D1F | |
| .word 0x1F1F0E1F | |
| .word 0x0C1F1F02 | |
| .word 0x1F1F0700 | |
| .word 0x021F1F0B | |
| .word 0x091F1F05 | |
| // operator table for expression parser: 2-byte entries (char, packed+0x20), sentinel=\0 | |
| // packed = (prec<<4)|opcode: | →0x10 & →0x21 + →0x32 - →0x33 * →0x44 | |
| op_table: .ascii "|0&A+R-S*d\0" | |
| // ══════════════════════════════════════════════════════════════════════════ | |
| // Code | |
| // ══════════════════════════════════════════════════════════════════════════ | |
| .text | |
| .global _start | |
| // ────────────────────────────────────────────────────────────────────────── | |
| // _start — entry point | |
| // ────────────────────────────────────────────────────────────────────────── | |
| _start: | |
| // grab argc / argv from the stack | |
| ldr x0, [sp] // argc | |
| cmp x0, #3 | |
| b.lt err_usage | |
| // set up state block pointer (x28 is callee-saved, lives forever) | |
| adr x28, state | |
| // pin x29 = 0x100000 (1 MB stride between section buffers) | |
| movz x29, #0x10, lsl #16 | |
| // pin x27 = text_buf (x28 + INPUT_BUF_OFF + INPUT_BUF_SIZE = x28 + 0x100120) | |
| add x27, x28, x29 | |
| add x27, x27, #INPUT_BUF_OFF | |
| // store input/output filenames | |
| ldp x1, x0, [sp, #16] // x1=argv[1] (input), x0=argv[2] (output) | |
| stp x1, x0, [x28, #ST_INPUT_NAME] | |
| // ── open and read the input file ────────────────────────────────────── | |
| // x1 already holds input filename from ldp above | |
| mov x0, #AT_FDCWD | |
| mov x2, #O_RDONLY | |
| mov x8, #SYS_openat | |
| svc #0 | |
| tbnz x0, #63, err_open | |
| add x1, x28, #INPUT_BUF_OFF // input_buf | |
| mov x2, #INPUT_BUF_SIZE | |
| mov x8, #SYS_read | |
| svc #0 | |
| tbnz x0, #63, err_open | |
| str x0, [x28, #ST_INPUT_LEN] | |
| // ── pass 1: collect symbols and measure sections ────────────────────── | |
| mov x0, #1 | |
| bl run_pass | |
| // ── compute section base addresses ──────────────────────────────────── | |
| ldp x1, x2, [x28, #ST_TEXT_POS] // text_pos, rodata_pos | |
| mov x0, #CODE_START | |
| add x1, x0, x1 // rodata_base = text_base + text_size | |
| stp x0, x1, [x28, #ST_TEXT_BASE] | |
| add x2, x1, x2 // bss_base = rodata_base + rodata_size | |
| str x2, [x28, #ST_BSS_BASE] | |
| ldr x3, [x28, #ST_BSS_POS] | |
| add x3, x2, x3 // mem_size = bss_base + bss_size | |
| stp x2, x3, [x28, #ST_FILE_SIZE] | |
| // ── rebase symbols: add section bases to label addresses ────────────── | |
| bl rebase_symbols | |
| // ── pass 2: encode instructions and emit data ───────────────────────── | |
| mov x0, #2 | |
| bl run_pass | |
| // ── compress .text section ──────────────────────────────────────────── | |
| bl compress_text | |
| mov x20, x0 // x20 = compressed stream size | |
| // ── allocate stack: 128 (ELF header) ────────────────────────────────── | |
| sub sp, sp, #128 | |
| // p_filesz = CODE_START + STUB_SIZE + FULL_DICT_SIZE + HALF_DICT_SIZE + stream + rodata | |
| ldr x21, [x28, #ST_RODATA_POS] // x21 = rodata_size (callee-saved) | |
| add x12, x20, x21 // stream + rodata | |
| add x12, x12, #(CODE_START + STUB_SIZE + FULL_DICT_SIZE + HALF_DICT_SIZE) | |
| // DECOMP_DEST_OFF = ceil_page(p_filesz) — offset from stub base | |
| add x11, x12, #0xFFF | |
| and x11, x11, #0xFFFFFFFFFFFFF000 // ceil to page | |
| // p_memsz = ceil_page(p_filesz) + total_mem_size | |
| ldr x13, [x28, #ST_MEM_SIZE] | |
| add x13, x13, x11 | |
| // ELF magic + e_ident[0..7] + zeros [8..15] | |
| movz x9, #0x457f | |
| movk x9, #0x464c, lsl #16 | |
| movk x9, #0x0102, lsl #32 | |
| movk x9, #0x0001, lsl #48 | |
| stp x9, xzr, [sp] | |
| // e_type=3, e_machine=0xB7, e_version=1 + e_entry=0x78 (stub entry) | |
| movz x9, #3 | |
| movk x9, #0x00B7, lsl #16 | |
| movk x9, #1, lsl #32 | |
| mov x10, #CODE_START // e_entry = stub at 0x78 | |
| stp x9, x10, [sp, #16] | |
| // e_phoff=64 + e_shoff=0 | |
| mov x9, #64 | |
| stp x9, xzr, [sp, #32] | |
| // e_flags=0|e_ehsize=64|e_phentsize=56 + e_phnum=1|rest=0 | |
| lsl x9, x9, #32 // x9 was 64 → 0x0040_0000_0000 | |
| movk x9, #0x0038, lsl #48 | |
| mov x10, #1 | |
| stp x9, x10, [sp, #48] | |
| // p_type=1|p_flags=7 + p_offset=0 | |
| movk x10, #7, lsl #32 | |
| stp x10, xzr, [sp, #64] | |
| // p_vaddr=0, p_paddr=0 | |
| stp xzr, xzr, [sp, #80] | |
| // p_filesz + p_memsz (compressed values) | |
| stp x12, x13, [sp, #96] | |
| // p_align = 0x10000 | |
| mov x9, #0x10000 | |
| str x9, [sp, #112] | |
| // ── open output file ────────────────────────────────────────────────── | |
| mov x0, #AT_FDCWD | |
| ldr x1, [x28, #ST_OUTPUT_NAME] | |
| mov x2, #O_WRONLY_CREAT_TRUNC | |
| mov w3, #493 // 0755 octal | |
| mov x8, #SYS_openat | |
| svc #0 | |
| tbnz x0, #63, err_create | |
| mov x19, x0 // fd | |
| // write ELF header + program header (120 bytes) | |
| mov x8, #SYS_write | |
| mov x1, sp | |
| mov x2, #CODE_START | |
| bl svc_x19 | |
| // header buffer no longer needed — reuse sp[0..7] for stub data (32-bit) | |
| stp w11, w21, [sp] | |
| // write decompressor stub code (from .text, excludes data block) | |
| adr x1, _decomp_stub_start | |
| mov x2, #STUB_DATA_DECOMP_DEST | |
| bl svc_x19 | |
| // write patched stub data block (from stack) | |
| mov x1, sp | |
| mov x2, #(STUB_SIZE - STUB_DATA_DECOMP_DEST) | |
| bl svc_x19 | |
| // write full_dict + half_dict (adjacent in memory) | |
| adr x1, full_dict | |
| mov x2, #(FULL_DICT_SIZE + HALF_DICT_SIZE) | |
| bl svc_x19 | |
| // write compressed stream | |
| add x1, x28, #INPUT_BUF_OFF | |
| mov x2, x20 | |
| bl svc_x19 | |
| // write .rodata section | |
| add x1, x27, x29 | |
| mov x2, x21 // rodata_size (saved in x21) | |
| bl svc_x19 | |
| // fchmod to make executable (returns 0 on success = our exit code) | |
| mov x1, #493 | |
| mov x8, #SYS_fchmod | |
| bl svc_x19 | |
| b exit_common | |
| // ────────────────────────────────────────────────────────────────────────── | |
| // Error exits | |
| // ────────────────────────────────────────────────────────────────────────── | |
| err_usage: | |
| adr x1, msg_usage | |
| b die_msg | |
| err_open: | |
| adr x1, msg_open | |
| b die_msg | |
| err_create: | |
| adr x1, msg_create | |
| die_msg: | |
| bl strlen_x1 | |
| bl write2 | |
| mov x0, #1 | |
| exit_common: | |
| mov x8, #SYS_exit | |
| svc #0 | |
| svc_x19: | |
| mov x0, x19 | |
| svc #0 | |
| ret | |
| write2: | |
| mov x0, #STDERR | |
| mov x8, #SYS_write | |
| svc #0 | |
| ret | |
| // strlen_x1 — compute length of null-terminated string in x1 → x2 | |
| strlen_x1: | |
| mov x2, #-1 | |
| 1: add x2, x2, #1 | |
| ldrb w10, [x1, x2] | |
| cbnz w10, 1b | |
| ret | |
| // ══════════════════════════════════════════════════════════════════════════ | |
| // Utility functions (spec §8.5) | |
| // | |
| // Calling convention: args in x0-x7, return in x0 (x1 for pairs). | |
| // Leaf functions — no stack frame needed. | |
| // ══════════════════════════════════════════════════════════════════════════ | |
| // ────────────────────────────────────────────────────────────────────────── | |
| // skip_ws — advance pointer past spaces and tabs | |
| // x0 = pointer | |
| // returns x0 = first non-whitespace position | |
| // ────────────────────────────────────────────────────────────────────────── | |
| skip1_ws: | |
| add x0, x0, #1 | |
| skip_ws: | |
| 1: ldrb w9, [x0] | |
| cbz w9, 2f | |
| cmp w9, #' ' | |
| csinc x0, x0, x0, hi | |
| b.ls 1b | |
| 2: ret | |
| ws_x2_skip1: | |
| mov x16, x30 | |
| bl ws_x2 | |
| mov x30, x16 | |
| b skip1_ws | |
| ws_x1: | |
| mov x0, x1 | |
| b skip_ws | |
| ws_x19: | |
| mov x0, x19 | |
| b skip_ws | |
| ws_x2: | |
| mov x0, x2 | |
| b skip_ws | |
| ws_x21: | |
| mov x0, x21 | |
| b skip_ws | |
| ws_x21_parse_reg: | |
| mov x16, x30 | |
| bl ws_x21 | |
| mov x30, x16 | |
| b parse_register | |
| // ────────────────────────────────────────────────────────────────────────── | |
| // decode_escape — decode backslash escape character | |
| // w9 = char after backslash; returns w9 = decoded character | |
| // ────────────────────────────────────────────────────────────────────────── | |
| decode_escape: | |
| cmp w9, #'0' | |
| csel w9, wzr, w9, eq | |
| cmp w9, #'n' | |
| mov w10, #10 | |
| csel w9, w10, w9, eq | |
| cmp w9, #'t' | |
| mov w10, #9 | |
| csel w9, w10, w9, eq | |
| ret | |
| // ────────────────────────────────────────────────────────────────────────── | |
| // parse_int — parse decimal, hex, or character literal | |
| // x0 = pointer (at first character of the number) | |
| // returns x0 = value, x1 = pointer past the parsed number | |
| // | |
| // formats: 123 -42 0x1F 0xFF 'A' '\n' | |
| // ────────────────────────────────────────────────────────────────────────── | |
| parse_int: | |
| ldrb w9, [x0] | |
| // character literal? | |
| cmp w9, #'\'' | |
| b.eq parse_int_char | |
| // negative? | |
| cmp w9, #'-' | |
| csinc x0, x0, x0, ne // advance past '-' if negative | |
| cset x11, eq // sign flag: 1 if '-', else 0 | |
| ldrb w9, [x0] // reload current char | |
| // hex prefix? | |
| cmp w9, #'0' | |
| b.ne parse_int_dec | |
| ldrb w10, [x0, #1] | |
| orr w10, w10, #0x20 | |
| cmp w10, #'x' | |
| b.eq parse_int_hex | |
| parse_int_dec: | |
| mov x12, #0 // accumulator | |
| 2: ldrb w9, [x0] | |
| sub w10, w9, #'0' | |
| cmp w10, #9 | |
| b.hi parse_int_done | |
| add x12, x12, x12, lsl #2 // x12 * 5 | |
| add x12, x10, x12, lsl #1 // digit + x12*10 | |
| add x0, x0, #1 | |
| b 2b | |
| parse_int_hex: | |
| add x0, x0, #2 // skip "0x" | |
| mov x12, #0 | |
| 3: ldrb w9, [x0] | |
| sub w10, w9, #'0' | |
| cmp w10, #9 | |
| b.ls 4f | |
| orr w10, w9, #0x20 // fold uppercase to lowercase | |
| sub w10, w10, #'a' | |
| cmp w10, #5 | |
| b.hi parse_int_done | |
| add w10, w10, #10 | |
| 4: add x12, x10, x12, lsl #4 | |
| add x0, x0, #1 | |
| b 3b | |
| parse_int_done: | |
| cbz x11, parse_int_ret | |
| neg x12, x12 | |
| parse_int_ret: | |
| mov x1, x0 | |
| mov x0, x12 | |
| ret | |
| parse_int_char: | |
| add x0, x0, #1 // skip opening quote | |
| ldrb w9, [x0], #1 // load char, advance past it | |
| cmp w9, #'\\' | |
| b.ne 1f | |
| // escape: x0 is past backslash already | |
| ldrb w9, [x0], #1 // load escape char, advance past it | |
| mov x16, x30 | |
| bl decode_escape | |
| mov x30, x16 | |
| 1: mov x12, x9 | |
| add x0, x0, #1 // skip closing quote | |
| b parse_int_ret | |
| // ────────────────────────────────────────────────────────────────────────── | |
| // parse_ident — parse an identifier [a-zA-Z_][a-zA-Z0-9_]* | |
| // x0 = pointer | |
| // returns x0 = start of ident, x1 = length, x2 = pointer past ident | |
| // if no valid identifier, x1 = 0 | |
| // ────────────────────────────────────────────────────────────────────────── | |
| parse_ident: | |
| mov x9, x0 // start | |
| ldrb w10, [x0], #1 | |
| b pi_check_first | |
| 1: ldrb w10, [x0], #1 | |
| // loop: accept digits (not valid for first char) | |
| sub w11, w10, #'0' | |
| cmp w11, #9 | |
| b.ls 1b | |
| pi_check_first: | |
| // accept underscore and letters | |
| cmp w10, #'_' | |
| b.eq 1b | |
| orr w11, w10, #0x20 | |
| sub w11, w11, #'a' | |
| cmp w11, #25 | |
| b.ls 1b | |
| // end of identifier (or not an identifier if x0 == x9) | |
| sub x2, x0, #1 // end pointer (x0 is one past due to post-index) | |
| sub x1, x2, x9 // length (0 if no ident) | |
| mov x0, x9 // start | |
| ret | |
| // ────────────────────────────────────────────────────────────────────────── | |
| // parse_register — parse register name | |
| // x0 = pointer | |
| // returns x0 = reg number (0-31), x1 = is_64bit, x2 = pointer past | |
| // on error: x0 = -1 | |
| // ────────────────────────────────────────────────────────────────────────── | |
| parse_register: | |
| // w9 pre-loaded by caller (skip_ws sets w9 = first non-ws char) | |
| // sp? | |
| ldrh w10, [x0] | |
| movz w11, #0x7073 // 'sp' in little-endian | |
| cmp w10, w11 | |
| b.ne 1f | |
| // make sure it's not a longer ident (e.g. "spaghetti") | |
| ldrb w10, [x0, #2] | |
| orr w11, w10, #0x20 | |
| sub w11, w11, #'a' | |
| cmp w11, #25 | |
| b.ls 1f | |
| sub w11, w10, #'0' | |
| cmp w11, #9 | |
| b.ls 1f | |
| cmp w10, #'_' | |
| b.eq 1f | |
| add x2, x0, #2 // end pointer (before clobbering x0) | |
| mov x0, #31 | |
| mov x1, #1 | |
| ret | |
| 1: cmp w9, #'x' | |
| cset x1, eq // x1=1 if 'x' (64-bit), else 0 | |
| b.eq parse_reg_xw | |
| cmp w9, #'w' | |
| b.ne parse_reg_fail | |
| parse_reg_xw: | |
| // check for xzr/wzr — load 4 bytes, extract bytes 1-2 as 16-bit LE | |
| ldr w10, [x0] | |
| ubfx w10, w10, #8, #16 | |
| movz w11, #0x727A // 'z' | ('r' << 8) in little-endian | |
| cmp w10, w11 | |
| b.ne parse_reg_num | |
| add x2, x0, #3 | |
| mov x0, #31 | |
| ret | |
| parse_reg_num: | |
| // x1 = is_64bit from cset above; not modified by this code | |
| ldrb w12, [x0, #1] // first digit | |
| sub w12, w12, #'0' | |
| cmp w12, #9 | |
| b.hi parse_reg_fail | |
| ldrb w10, [x0, #2] | |
| sub w11, w10, #'0' | |
| cmp w11, #9 | |
| add x2, x0, #2 // end pointer (single digit); flags unaffected | |
| b.hi 1f // single digit | |
| add w13, w12, w12, lsl #2 // first * 5 | |
| add w12, w11, w13, lsl #1 // second + first * 10 | |
| add x2, x2, #1 | |
| 1: cmp w12, #30 | |
| b.hi parse_reg_fail | |
| mov x0, x12 | |
| ret | |
| // ────────────────────────────────────────────────────────────────────────── | |
| // sym_lookup — find a symbol in the hash table | |
| // x0 = name pointer, x1 = name length | |
| // returns x0 = pointer to entry, x1 = 1 if found (0 if empty slot) | |
| // | |
| // uses x28 (state block) to reach sym_table / sym_names | |
| // ────────────────────────────────────────────────────────────────────────── | |
| sym_lookup: | |
| // leaf function — no frame needed, uses scratch registers only | |
| mov x15, x0 // name ptr | |
| mov x16, x1 // name len | |
| // hash the name (inlined djb2) | |
| mov x9, #5381 | |
| cbz x1, 2f | |
| 1: sub x1, x1, #1 | |
| ldrb w12, [x0, x1] | |
| add x9, x9, x9, lsl #5 | |
| add x9, x9, x12 | |
| cbnz x1, 1b | |
| // slot = hash & (SYM_TBL_SLOTS - 1) | |
| 2: and x17, x9, #(SYM_TBL_SLOTS - 1) | |
| add x14, x27, x29, lsl #1 // sym_table = text_buf + 2*1MB | |
| sym_lookup_probe: | |
| // entry = &sym_table[slot * 32] | |
| add x13, x14, x17, lsl #5 // entry pointer in x13 | |
| // check if slot is empty (name_ptr == NULL) | |
| ldr x12, [x13, #SYM_NAME_PTR] | |
| cbz x12, sym_lookup_empty | |
| // compare name_len | |
| ldr w11, [x13, #SYM_NAME_LEN] | |
| cmp w11, w16 | |
| b.ne sym_lookup_next | |
| // compare name bytes (x12 = direct pointer into input buffer) | |
| mov x0, x16 // counter | |
| 1: sub x0, x0, #1 | |
| ldrb w9, [x12, x0] | |
| ldrb w10, [x15, x0] | |
| cmp w9, w10 | |
| b.ne sym_lookup_next | |
| cbnz x0, 1b | |
| mov x1, #1 | |
| b sym_lookup_ret | |
| sym_lookup_next: | |
| add x17, x17, #1 | |
| and x17, x17, #(SYM_TBL_SLOTS - 1) | |
| b sym_lookup_probe | |
| sym_lookup_empty: | |
| mov x1, #0 | |
| sym_lookup_ret: | |
| mov x0, x13 | |
| ret | |
| // ────────────────────────────────────────────────────────────────────────── | |
| // sym_define — insert or update a symbol | |
| // x0 = name pointer, x1 = name length, x2 = value, x3 = flags | |
| // | |
| // if the symbol already exists, updates value and flags (OR'd). | |
| // if new, stores direct name pointer from input buffer. | |
| // ────────────────────────────────────────────────────────────────────────── | |
| sym_define: | |
| stp x19, x30, [sp, #-16]! | |
| mov x19, x2 // value (callee-saved) | |
| // x3 = flags (preserved across sym_lookup — leaf, doesn't touch x3) | |
| bl sym_lookup | |
| // x0 = entry, x15 = name ptr, x16 = name len (set by sym_lookup) | |
| cbnz x1, sym_define_update | |
| // ── new entry: store name pointer directly ──────────────────────────── | |
| str x15, [x0, #SYM_NAME_PTR] | |
| str w16, [x0, #SYM_NAME_LEN] | |
| sym_define_update: | |
| str x19, [x0, #SYM_VALUE] | |
| // OR in flags (don't clobber existing bits) | |
| ldr x9, [x0, #SYM_FLAGS] | |
| orr x9, x9, x3 | |
| str x9, [x0, #SYM_FLAGS] | |
| ldp x19, x30, [sp], #16 | |
| ret | |
| // ────────────────────────────────────────────────────────────────────────── | |
| // error_at — print "filename:line: msg\n" to stderr and exit(1) | |
| // x0 = message pointer (null-terminated) | |
| // | |
| // uses state block for filename and line number | |
| // ────────────────────────────────────────────────────────────────────────── | |
| error_at: | |
| mov x19, x0 // msg ptr | |
| // write filename | |
| ldr x1, [x28, #ST_INPUT_NAME] | |
| bl strlen_x1 | |
| bl write2 | |
| // build ":[linenum]: " in frame buffer and write it | |
| ldr x9, [x28, #ST_LINE_NUM] | |
| add x11, sp, #44 | |
| mov x10, x11 | |
| mov x12, #10 | |
| 3: udiv x13, x9, x12 | |
| msub x14, x13, x12, x9 | |
| add w14, w14, #'0' | |
| strb w14, [x10, #-1]! | |
| mov x9, x13 | |
| cbnz x9, 3b | |
| mov w14, #':' | |
| strb w14, [x10, #-1]! | |
| movz w14, #0x203A | |
| strh w14, [x11] | |
| mov x1, x10 | |
| sub x2, x11, x10 | |
| add x2, x2, #2 | |
| bl write2 | |
| // write message (includes \n) and exit | |
| mov x1, x19 | |
| b die_msg | |
| // ══════════════════════════════════════════════════════════════════════════ | |
| // Pass driver and line processing | |
| // ══════════════════════════════════════════════════════════════════════════ | |
| // ────────────────────────────────────────────────────────────────────────── | |
| // run_pass — iterate over all source lines | |
| // x0 = pass number (1 or 2) | |
| // ────────────────────────────────────────────────────────────────────────── | |
| run_pass: | |
| stp x30, x19, [sp, #-64]! | |
| stp x20, x21, [sp, #16] | |
| stp x22, x23, [sp, #32] | |
| str x0, [x28, #ST_PASS] | |
| // reset section positions and current section | |
| stp xzr, xzr, [x28, #ST_TEXT_POS] | |
| stp xzr, xzr, [x28, #ST_BSS_POS] | |
| // reset line number (x21 = line counter, synced to state before process_line) | |
| mov x21, #1 | |
| // reset numeric label cursors (inline zero fill) | |
| add x0, x28, #NUMLAB_CURS_OFF | |
| mov x2, #(NUMLAB_DIGITS * 8) | |
| 1: sub x2, x2, #1 | |
| strb wzr, [x0, x2] | |
| cbnz x2, 1b | |
| // set up input pointers | |
| add x19, x28, #INPUT_BUF_OFF // input_buf | |
| ldr x9, [x28, #ST_INPUT_LEN] | |
| add x20, x19, x9 // x20 = end of input | |
| run_pass_loop: | |
| cmp x19, x20 | |
| b.ge pl_done | |
| // find end of line (newline or end of buffer) | |
| mov x22, x19 | |
| 1: cmp x22, x20 | |
| b.ge 2f | |
| ldrb w10, [x22], #1 | |
| cmp w10, #'\n' | |
| b.ne 1b | |
| sub x22, x22, #1 // back up to newline | |
| 2: | |
| // temporarily null-terminate | |
| ldrb w23, [x22] | |
| strb wzr, [x22] | |
| // process the line | |
| str x21, [x28, #ST_LINE_NUM] | |
| mov x0, x19 | |
| bl process_line | |
| // restore original byte | |
| strb w23, [x22] | |
| // advance past newline | |
| add x19, x22, #1 | |
| add x21, x21, #1 | |
| b run_pass_loop | |
| // ────────────────────────────────────────────────────────────────────────── | |
| // process_line — handle one null-terminated source line | |
| // x0 = line start (null-terminated) | |
| // ────────────────────────────────────────────────────────────────────────── | |
| process_line: | |
| stp x30, x19, [sp, #-64]! | |
| stp x20, x21, [sp, #16] | |
| stp x22, x23, [sp, #32] | |
| bl skip_ws | |
| mov x19, x0 | |
| pl_check_content: | |
| // empty line? (w9 pre-loaded by skip_ws / ws_x19) | |
| cbz w9, pl_done | |
| // comment? ( // ) | |
| ldrh w10, [x19] | |
| movz w11, #0x2F2F // "//" in little-endian | |
| cmp w10, w11 | |
| b.eq pl_done | |
| // ── check for numeric label (digit followed by ':') ─────────────────── | |
| // w10 = ldrh from [x19]; for "N:", w10 = 0x3A30..0x3A39 | |
| movz w11, #0x3A30 // ':' << 8 | '0' | |
| sub w10, w10, w11 | |
| cmp w10, #9 | |
| b.hi pl_not_numlab | |
| // numeric label — record in pass 1 | |
| mov x0, x10 // digit (0-9) | |
| bl handle_numlab | |
| add x19, x19, #2 | |
| b pl_after_label | |
| pl_not_numlab: | |
| // ── check for named label or mnemonic ───────────────────────────────── | |
| cmp w9, #'.' | |
| b.eq pl_directive | |
| mov x0, x19 | |
| bl parse_ident | |
| cbz x1, pl_done // no identifier → skip | |
| // is it a label (followed by ':')? | |
| ldrb w9, [x2] | |
| cmp w9, #':' | |
| b.ne pl_instruction | |
| // ── named label ─────────────────────────────────────────────────────── | |
| add x19, x2, #1 // past ':' | |
| // only define in pass 1 (pass 2 uses rebased values) | |
| ldr x9, [x28, #ST_PASS] | |
| tbnz x9, #1, pl_after_label | |
| // value = current section offset | |
| ldr x11, [x28, #ST_CUR_SEC] | |
| ldr x2, [x28, x11] | |
| // flags = DEFINED | (cur_section << SEC_SHIFT); x11 = sec*8 | |
| lsl x3, x11, #1 | |
| orr x3, x3, #SYMF_DEFINED | |
| bl sym_define | |
| pl_after_label: | |
| bl ws_x19 | |
| mov x19, x0 | |
| b pl_check_content | |
| // ── directive (starts with '.') ─────────────────────────────────────── | |
| pl_directive: | |
| add x0, x19, #1 // skip '.' | |
| bl parse_ident | |
| cbz x1, pl_done | |
| // x0 = name start, x1 = name length, x2 = end pointer | |
| mov x20, x0 // directive name | |
| mov x21, x1 // directive length | |
| mov x19, x2 // position after directive name | |
| // dispatch on directive name — check first char then length | |
| ldrb w9, [x20] | |
| cmp w9, #'b' | |
| mov x10, #SEC_BSS // doesn't affect flags | |
| b.eq dir_sec_set | |
| cmp w9, #'s' | |
| b.ne 5f | |
| cmp x21, #7 | |
| b.ne dir_skip | |
| mov x10, #SEC_RODATA | |
| b dir_sec_set | |
| 5: cmp w9, #'a' | |
| b.ne 6f | |
| ldrb w10, [x20, #4] | |
| cmp w10, #'n' | |
| b.eq dir_align | |
| b dir_str_common | |
| 6: cmp w9, #'e' | |
| b.ne 7f | |
| // inline dir_equ: | |
| bl ws_x19 | |
| bl parse_ident | |
| cbz x1, pl_done | |
| mov x20, x0 | |
| mov x21, x1 | |
| bl ws_x2_skip1 | |
| bl parse_expr0 | |
| mov x2, x0 | |
| mov x3, #(SYMF_DEFINED | SYMF_EQU) | |
| mov x0, x20 | |
| mov x1, x21 | |
| bl sym_define | |
| b pl_done | |
| 7: cmp w9, #'g' | |
| b.ne dir_word | |
| // inline dir_global: | |
| bl ws_x19 | |
| bl parse_ident | |
| cbz x1, pl_done | |
| mov x2, #0 | |
| mov x3, #SYMF_GLOBAL | |
| bl sym_define | |
| b pl_done | |
| // ── instruction ─────────────────────────────────────────────────────── | |
| pl_instruction: | |
| // x0 = mnemonic start, x1 = mnemonic length, x2 = position after | |
| ldr x9, [x28, #ST_PASS] | |
| tbnz x9, #1, encode_instruction | |
| // pass 1: advance text pos by 4 (write garbage to buf — overwritten in pass 2) | |
| b emit_inst_done | |
| dir_word: | |
| cmp w9, #'w' | |
| b.ne dir_text | |
| // .word <expr> — emit 4-byte little-endian value | |
| bl parse_expr0_x19 | |
| mov x22, x0 // value | |
| // always write to buffer (pass 1 writes are harmless, overwritten in pass 2) | |
| ldr x11, [x28, #ST_CUR_SEC] | |
| ldr x10, [x28, x11] // current pos | |
| add x0, x27, x11, lsl #17 // text_buf + sec * 1MB | |
| str w22, [x0, x10] // store 4 bytes | |
| mov x0, #4 | |
| b advance_sec_pos | |
| dir_text: | |
| cmp w9, #'t' | |
| mov x10, #SEC_TEXT // doesn't affect flags | |
| b.ne pl_done | |
| dir_sec_set: | |
| str x10, [x28, #ST_CUR_SEC] | |
| pl_done: | |
| ldp x22, x23, [sp, #32] | |
| ldp x20, x21, [sp, #16] | |
| ldp x30, x19, [sp], #64 | |
| ret | |
| // ══════════════════════════════════════════════════════════════════════════ | |
| // Directive handlers | |
| // | |
| // On entry: x19 = parse position after directive name | |
| // x20, x21 available (saved by process_line's frame) | |
| // Must jump to pl_done when finished. | |
| // ══════════════════════════════════════════════════════════════════════════ | |
| // .align N — align to 2^N boundary | |
| dir_align: | |
| bl parse_expr0_x19 | |
| // x0 = N (alignment power) | |
| mov x10, x0 | |
| ldr x11, [x28, #ST_CUR_SEC] | |
| ldr x0, [x28, x11] // current position | |
| // aligned = (pos + mask) & ~mask where mask = (1<<N)-1 | |
| mov x9, #1 | |
| lsl x9, x9, x10 // 1 << N | |
| sub x9, x9, #1 // mask | |
| add x0, x0, x9 // pos + mask | |
| bic x0, x0, x9 // & ~mask = aligned position | |
| str x0, [x28, x11] | |
| b pl_done | |
| // .skip N — advance by N bytes | |
| dir_skip: | |
| bl parse_expr0_x19 | |
| b advance_sec_pos | |
| // .ascii/.asciz "string" — w10 still holds directive[4] ('i' or 'z') | |
| dir_str_common: | |
| cmp w10, #'z' | |
| cset x21, eq // null_flag: 1 if asciz, 0 if ascii | |
| bl ws_x19 // x0 = pointer to '"' | |
| // always compute dest buffer (pass 1 writes are harmless, overwritten in pass 2) | |
| ldr x11, [x28, #ST_CUR_SEC] // x0 preserved | |
| add x20, x27, x11, lsl #17 // text_buf + sec * 1MB | |
| ldr x10, [x28, x11] | |
| add x20, x20, x10 | |
| mov x1, x20 | |
| bl parse_string // x0 = count, x1 = ptr past | |
| cbz x21, 2f // not asciz: skip null | |
| strb wzr, [x20, x0] // write null terminator | |
| 2: add x0, x0, x21 // count + null_flag | |
| advance_sec_pos: | |
| ldr x11, [x28, #ST_CUR_SEC] | |
| ldr x10, [x28, x11] | |
| add x10, x10, x0 | |
| str x10, [x28, x11] | |
| b pl_done | |
| // ────────────────────────────────────────────────────────────────────────── | |
| // handle_numlab — record a numeric label definition | |
| // x0 = digit (0-9) | |
| // ────────────────────────────────────────────────────────────────────────── | |
| handle_numlab: | |
| // x0 = digit (0-9) — leaf function, no frame needed | |
| ldr x10, [x28, #ST_PASS] | |
| tbnz x10, #1, handle_numlab_p2 | |
| // pass 1: numeric labels are always in .text | |
| ldr x10, [x28, #ST_TEXT_POS] | |
| // count = numlab_cnts[digit] | |
| add x11, x28, #NUMLAB_CNTS_OFF | |
| ldr x12, [x11, x0, lsl #3] // count | |
| // store address: numlab_defs[digit * MAX_DEFS + count] | |
| adrp x13, numlab_defs | |
| add x13, x13, :lo12:numlab_defs | |
| lsl x14, x0, #7 // digit * 128 | |
| add x14, x14, x12 | |
| str x10, [x13, x14, lsl #3] | |
| // increment count | |
| add x12, x12, #1 | |
| str x12, [x11, x0, lsl #3] | |
| ret | |
| handle_numlab_p2: | |
| add x11, x28, #NUMLAB_CURS_OFF | |
| ldr x10, [x11, x0, lsl #3] | |
| add x10, x10, #1 | |
| str x10, [x11, x0, lsl #3] | |
| ret | |
| // ────────────────────────────────────────────────────────────────────────── | |
| // rebase_symbols — after pass 1, add section bases to label values | |
| // ────────────────────────────────────────────────────────────────────────── | |
| rebase_symbols: | |
| add x9, x27, x29, lsl #1 // sym_table = text_buf + 2*1MB | |
| mov x10, #SYM_TBL_SLOTS | |
| rebase_loop: | |
| ldr x13, [x9, #SYM_NAME_PTR] | |
| cbz x13, rebase_next // empty slot | |
| ldp x14, x17, [x9, #SYM_FLAGS] // flags, value | |
| tbnz x14, #2, rebase_next // bit 2 = SYMF_EQU, skip | |
| // extract section from flags bits 5:4 | |
| ubfx x15, x14, #SYMF_SEC_SHIFT, #2 | |
| // base[section] = state[ST_TEXT_BASE + section*8] | |
| add x16, x28, x15, lsl #3 | |
| ldr x16, [x16, #ST_TEXT_BASE] | |
| add x17, x17, x16 | |
| str x17, [x9, #SYM_VALUE] | |
| rebase_next: | |
| add x9, x9, #SYM_ENT_SIZE | |
| sub x10, x10, #1 | |
| cbnz x10, rebase_loop | |
| // x9 now points to numlab_defs (sym_table + SYM_TBL_BYTES) | |
| add x10, x28, #NUMLAB_CNTS_OFF | |
| // numeric labels are always in the text section for now | |
| ldr x16, [x28, #ST_TEXT_BASE] | |
| mov x11, #0 // digit | |
| rebase_numlab_digit: | |
| ldr x12, [x10, x11, lsl #3] // count for this digit | |
| lsl x14, x11, #7 // base index = digit * 128 | |
| cbz x12, 5f // skip if count = 0 | |
| 4: sub x12, x12, #1 | |
| add x17, x14, x12 | |
| ldr x0, [x9, x17, lsl #3] | |
| add x0, x0, x16 | |
| str x0, [x9, x17, lsl #3] | |
| cbnz x12, 4b | |
| 5: add x11, x11, #1 | |
| cmp x11, #NUMLAB_DIGITS | |
| b.lt rebase_numlab_digit | |
| ret | |
| // ══════════════════════════════════════════════════════════════════════════ | |
| // Expression evaluator — recursive descent | |
| // | |
| // Each function: x0 = pointer → x0 = value, x1 = pointer past expr | |
| // | |
| // Precedence (low to high): | & +/- * <</>> unary(~ -) atom | |
| // ══════════════════════════════════════════════════════════════════════════ | |
| // ────────────────────────────────────────────────────────────────────────── | |
| // parse_expr — Pratt binary expression parser | |
| // x0 = pointer, x1 = min_prec (0 for top-level callers) | |
| // returns x0 = value, x1 = pointer past expr | |
| // | |
| // Precedence: | (1) < & (2) < +/- (3) < * (4) < <<,>> (5) | |
| // ────────────────────────────────────────────────────────────────────────── | |
| parse_expr0_x19: | |
| mov x0, x19 | |
| parse_expr0: | |
| mov x1, #0 | |
| parse_expr: | |
| stp x30, x19, [sp, #-64]! | |
| stp x20, x21, [sp, #16] | |
| stp x22, x23, [sp, #32] | |
| mov x22, x1 // min_prec | |
| bl parse_expr_unary | |
| mov x19, x0 // lhs value | |
| mov x20, x1 // current position | |
| // Operator dispatch: x21 encodes (prec<<4)|opcode | |
| // | → 0x10 & → 0x21 + → 0x32 - → 0x33 * → 0x44 << → 0x55 >> → 0x56 | |
| pe_loop: | |
| mov x0, x20 | |
| bl skip_ws | |
| mov x20, x0 | |
| adr x10, op_table | |
| 1: ldrb w11, [x10] | |
| cbz w11, 5f | |
| cmp w9, w11 | |
| ldrb w21, [x10, #1] | |
| sub x21, x21, #32 | |
| add x10, x10, #2 | |
| b.ne 1b | |
| b pe_check_prec | |
| 5: cmp w9, #'<' | |
| b.eq pe_shift | |
| cmp w9, #'>' | |
| b.ne pe_done | |
| pe_shift: | |
| ldrb w10, [x20, #1] | |
| cmp w10, w9 | |
| b.ne pe_done | |
| add x20, x20, #1 | |
| lsr w21, w9, #1 | |
| add x21, x21, #55 | |
| pe_check_prec: | |
| lsr x9, x21, #4 // prec = x21 >> 4 | |
| and x23, x21, #0xF // opcode = x21 & 0xF (callee-saved) | |
| cmp x9, x22 // op_prec vs min_prec | |
| b.lt pe_done // op_prec < min_prec: not ours | |
| add x20, x20, #1 // skip operator char | |
| mov x0, x20 | |
| add x1, x9, #1 // recurse with prec+1 | |
| bl parse_expr | |
| mov x20, x1 // update position | |
| adr x9, pe_ops | |
| add x9, x9, x23, lsl #3 | |
| br x9 | |
| pe_ops: | |
| orr x19, x19, x0 // opcode 0: | | |
| b pe_loop | |
| and x19, x19, x0 // opcode 1: & | |
| b pe_loop | |
| add x19, x19, x0 // opcode 2: + | |
| b pe_loop | |
| sub x19, x19, x0 // opcode 3: - | |
| b pe_loop | |
| mul x19, x19, x0 // opcode 4: * | |
| b pe_loop | |
| lsl x19, x19, x0 // opcode 5: << | |
| b pe_loop | |
| lsr x19, x19, x0 // opcode 6: >> | |
| b pe_loop | |
| pe_done: | |
| mov x0, x19 | |
| mov x1, x20 | |
| b pl_done | |
| // ────────────────────────────────────────────────────────────────────────── | |
| // parse_expr_unary — handles '~', unary '-', then falls through to atom | |
| // ────────────────────────────────────────────────────────────────────────── | |
| parse_expr_unary: | |
| stp x30, x20, [sp, #-16]! | |
| bl skip_ws | |
| cmp w9, #'~' | |
| b.eq pe_unary_not | |
| cmp w9, #'-' | |
| b.eq pe_unary_neg | |
| // not unary, fall through to parse atom (skip_ws already done) | |
| // '(' — grouped expression | |
| cmp w9, #'(' | |
| b.eq pe_atom_paren | |
| // '.' — current location counter | |
| cmp w9, #'.' | |
| b.eq pe_atom_dot | |
| // digit or '-' or '\'' — numeric literal | |
| sub w10, w9, #'0' | |
| cmp w10, #9 | |
| b.ls pe_atom_num | |
| cmp w9, #'\'' | |
| b.eq pe_atom_num | |
| // identifier — symbol reference | |
| bl parse_ident | |
| cbz x1, pe_atom_err | |
| mov x20, x2 // end pointer (return this) | |
| // look up symbol | |
| bl sym_lookup | |
| cbz x1, pe_atom_undef | |
| // return value | |
| ldr x0, [x0, #SYM_VALUE] | |
| b pea_ret_x20 | |
| pe_atom_undef: | |
| // in pass 1, undefined symbols get 0 (forward ref in instruction) | |
| ldr x9, [x28, #ST_PASS] | |
| tbz x9, #1, 1f | |
| // pass 2: error | |
| err_undef: | |
| adr x0, msg_undef | |
| bl error_at | |
| 1: mov x0, #0 | |
| b pea_ret_x20 | |
| pe_atom_paren: | |
| add x0, x0, #1 // skip '(' | |
| bl parse_expr0 | |
| mov x20, x0 // value | |
| bl ws_x1 | |
| cmp w9, #')' | |
| b.ne pe_atom_err | |
| add x1, x0, #1 // pointer past ')' | |
| mov x0, x20 | |
| b pea_ret | |
| pe_atom_dot: | |
| mov x20, x0 // save pointer to '.' | |
| ldr x11, [x28, #ST_CUR_SEC] | |
| ldr x0, [x28, x11] // section offset | |
| ldr x10, [x28, #ST_PASS] | |
| tbz x10, #1, 1f | |
| // pass 2: add section base (x11 = sec*8) | |
| add x11, x28, x11 | |
| ldr x11, [x11, #ST_TEXT_BASE] | |
| add x0, x0, x11 | |
| 1: add x1, x20, #1 // pointer past '.' | |
| b pea_ret | |
| pe_atom_num: | |
| bl parse_int | |
| b pea_ret | |
| pea_ret_x20: | |
| mov x1, x20 | |
| pea_ret: | |
| ldp x30, x20, [sp], #16 | |
| ret | |
| pe_atom_err: | |
| adr x0, msg_syntax | |
| bl error_at | |
| pe_unary_not: | |
| add x0, x0, #1 | |
| bl parse_expr_unary // recursive | |
| mvn x0, x0 | |
| b pea_ret | |
| pe_unary_neg: | |
| add x0, x0, #1 | |
| bl parse_expr_unary // recursive | |
| sub x0, xzr, x0 // neg | |
| b pea_ret | |
| // ══════════════════════════════════════════════════════════════════════════ | |
| // String parsing | |
| // ══════════════════════════════════════════════════════════════════════════ | |
| // ────────────────────────────────────────────────────────────────────────── | |
| // parse_string — parse a quoted string, count or emit bytes | |
| // x0 = pointer (at the opening '"') | |
| // x1 = destination (NULL to just count) | |
| // returns x0 = byte count, x1 = pointer past closing '"' | |
| // ────────────────────────────────────────────────────────────────────────── | |
| parse_string: | |
| mov x16, x30 | |
| mov x15, x1 // dest (or NULL) | |
| add x0, x0, #1 // skip opening '"' | |
| mov x14, #0 // byte count | |
| ps_loop: | |
| ldrb w9, [x0], #1 // load + advance | |
| cbz w9, ps_done // unterminated string | |
| cmp w9, #'"' | |
| b.eq ps_done // closing quote (x0 already past it) | |
| cmp w9, #'\\' | |
| b.eq ps_escape | |
| // plain character — x0 already advanced by post-increment | |
| ps_store: | |
| strb w9, [x15, x14] | |
| add x14, x14, #1 | |
| b ps_loop | |
| ps_escape: | |
| ldrb w9, [x0], #1 // load escape char, advance (past backslash) | |
| bl decode_escape | |
| b ps_store | |
| ps_done: | |
| mov x1, x0 | |
| mov x0, x14 // byte count | |
| br x16 | |
| // ══════════════════════════════════════════════════════════════════════════ | |
| // Pass 2 infrastructure | |
| // ══════════════════════════════════════════════════════════════════════════ | |
| // ────────────────────────────────────────────────────────────────────────── | |
| // emit_inst_done — emit instruction word then restore encode_instruction frame | |
| // x0 = instruction word; reached via 'b' from within encode_instruction | |
| // ────────────────────────────────────────────────────────────────────────── | |
| // emit_with_sf — apply sf bit into bit 31 of w0, then emit | |
| emit_with_sf: | |
| mov w24, w23 | |
| emit_with_sf24: | |
| orr w0, w0, w24, lsl #31 | |
| emit_inst_done: | |
| ldr x9, [x28, #ST_TEXT_POS] | |
| str w0, [x27, x9] | |
| add x9, x9, #4 | |
| str x9, [x28, #ST_TEXT_POS] | |
| b pl_done | |
| // ────────────────────────────────────────────────────────────────────────── | |
| // parse_label_pc_rel — parse label ref then compute PC-relative offset | |
| // uses [sp, #48] for return address | |
| // returns x0 = signed offset in instruction units | |
| // ────────────────────────────────────────────────────────────────────────── | |
| parse_label_pc_rel: | |
| str x30, [sp, #48] | |
| bl parse_label_ref | |
| ldr x9, [x28, #ST_TEXT_BASE] | |
| ldr x10, [x28, #ST_TEXT_POS] | |
| add x9, x9, x10 | |
| sub x0, x0, x9 | |
| asr x0, x0, #2 | |
| ldr x30, [sp, #48] | |
| ret | |
| // parse_x23_ws — parse first register into x23, skip comma+ws | |
| // x1 preserved (sf/is_64bit from parse_register) | |
| // uses [sp, #48] for return address | |
| parse_x23_ws: | |
| str x30, [sp, #48] | |
| bl ws_x21_parse_reg | |
| mov x23, x0 | |
| ldr x30, [sp, #48] | |
| b ws_x2_skip1 | |
| // ────────────────────────────────────────────────────────────────────────── | |
| // parse_2reg — parse "Rd, Rn" from operands (x21) | |
| // returns x22 = Rd, x23 = sf, x0 = Rn | |
| // NOTE: uses [sp, #56] for return address; called from encode_instruction | |
| // ────────────────────────────────────────────────────────────────────────── | |
| parse_2reg: | |
| str x30, [sp, #56] | |
| bl parse_x23_ws | |
| mov x22, x23 // Rd | |
| mov x23, x1 // sf | |
| b p23_tail | |
| // ────────────────────────────────────────────────────────────────────────── | |
| // parse_3reg — parse "Rd, Rn, Rm" from operands (x21) | |
| // returns x22 = Rd, x23 = sf, x24 = Rn, x0 = Rm | |
| // NOTE: uses [sp, #56] for return address; called from encode_instruction | |
| // ────────────────────────────────────────────────────────────────────────── | |
| parse_3reg: | |
| str x30, [sp, #56] | |
| bl parse_x23_ws | |
| mov x22, x23 // Rd | |
| mov x23, x1 // sf | |
| bl parse_register | |
| mov x24, x0 // Rn | |
| bl ws_x2_skip1 // skip ',' | |
| p23_tail: | |
| ldr x30, [sp, #56] | |
| b parse_register | |
| skip_lsl: | |
| 1: ldrb w9, [x0, #1]! | |
| cmp w9, #'#' | |
| b.ne 1b | |
| // falls through to parse_hash_imm | |
| // ────────────────────────────────────────────────────────────────────────── | |
| // parse_hash_imm — parse #expr or #:lo12:expr | |
| // x0 = pointer (at '#') | |
| // returns x0 = value, x1 = pointer past, x2 = 1 if :lo12: | |
| // ────────────────────────────────────────────────────────────────────────── | |
| parse_hash_imm: | |
| str x30, [sp, #48] | |
| // check first char: '#' or ':' | |
| ldrb w9, [x0] | |
| cmp w9, #'#' | |
| csinc x0, x0, x0, ne // skip '#' if found | |
| ldrb w9, [x0] | |
| cmp w9, #':' | |
| b.ne phi_plain | |
| ldrb w10, [x0, #1] | |
| cmp w10, #'l' | |
| b.ne phi_plain | |
| // :lo12: — skip 6 chars | |
| add x0, x0, #6 | |
| bl parse_expr0 | |
| and x0, x0, #0xFFF | |
| b phi_ret | |
| phi_plain: | |
| bl parse_expr0 | |
| phi_ret: | |
| ldr x30, [sp, #48] | |
| ret | |
| // ────────────────────────────────────────────────────────────────────────── | |
| // parse_label_ref — parse branch target (named label or Nf/Nb) | |
| // x0 = pointer | |
| // returns x0 = target address, x1 = pointer past | |
| // ────────────────────────────────────────────────────────────────────────── | |
| parse_label_ref: | |
| stp x30, x20, [sp, #-16]! | |
| bl skip_ws | |
| // numeric label ref? digit followed by 'f' or 'b' | |
| sub w10, w9, #'0' | |
| cmp w10, #9 | |
| b.hi plr_named | |
| ldrb w11, [x0, #1] | |
| cmp w11, #'b' | |
| cset x1, eq // x1=1 backward, 0 forward | |
| b.eq plr_numlab_common | |
| cmp w11, #'f' | |
| b.ne plr_named | |
| plr_numlab_common: | |
| add x20, x0, #2 // pointer past "Nf"/"Nb" | |
| mov x0, x10 // digit | |
| // inlined rnl_entry | |
| add x9, x28, #NUMLAB_CURS_OFF | |
| ldr x10, [x9, x0, lsl #3] // cursor | |
| sub x10, x10, x1 // backward: cursor-1, forward: cursor | |
| adrp x11, numlab_defs | |
| add x11, x11, :lo12:numlab_defs | |
| lsl x12, x0, #7 // digit * 128 | |
| add x12, x12, x10 // + cursor | |
| ldr x0, [x11, x12, lsl #3] | |
| b pea_ret_x20 | |
| plr_named: | |
| bl parse_ident | |
| cbz x1, err_undef | |
| mov x20, x2 // save end pointer | |
| bl sym_lookup | |
| cbz x1, err_undef | |
| ldr x0, [x0, #SYM_VALUE] | |
| b pea_ret_x20 | |
| // ────────────────────────────────────────────────────────────────────────── | |
| // encode_logical_imm — encode bitmask immediate for logical instructions | |
| // x0 = value, x1 = is_32bit (1=replicate low 32 to full 64) | |
| // returns x0 = (N << 12) | (immr << 6) | imms, or -1 if unencodable | |
| // ────────────────────────────────────────────────────────────────────────── | |
| encode_logical_imm: | |
| // leaf function — no frame needed, uses scratch registers only | |
| // for 32-bit, replicate low 32 bits | |
| cbz x1, eli_start | |
| and x0, x0, #0xFFFFFFFF | |
| orr x0, x0, x0, lsl #32 | |
| eli_start: | |
| mov x13, x0 // val | |
| // reject all-zeros and all-ones | |
| cbz x13, ei_logical_bad | |
| mvn x9, x13 | |
| cbz x9, ei_logical_bad | |
| // rotation = ctz(val & (val + 1)) | |
| add x9, x13, #1 | |
| and x9, x13, x9 | |
| rbit x10, x9 | |
| clz x14, x10 // rotation | |
| // normalized = ror(val, rotation) | |
| ror x9, x13, x14 | |
| // zeroes = clz(normalized) | |
| clz x10, x9 | |
| // ones = ctz(~normalized) = clz(rbit(~normalized)) | |
| mvn x11, x9 | |
| rbit x11, x11 | |
| clz x15, x11 // ones | |
| // size = zeroes + ones | |
| add x16, x10, x15 | |
| // validate: ror(val, size) == val | |
| ror x9, x13, x16 | |
| cmp x9, x13 | |
| b.ne ei_logical_bad | |
| // immr = (-rotation) & (size - 1) | |
| neg x9, x14 | |
| sub x10, x16, #1 | |
| and x9, x9, x10 // immr | |
| // imms = (-(size << 1) | (ones - 1)) & 0x3F | |
| sub x11, xzr, x16, lsl #1 | |
| sub x12, x15, #1 | |
| orr x11, x11, x12 | |
| and x11, x11, #0x3F // imms | |
| // result = (N << 12) | (immr << 6) | imms where N = size >> 6 | |
| lsr x12, x16, #6 | |
| orr x0, x11, x9, lsl #6 | |
| orr x0, x0, x12, lsl #12 | |
| ret | |
| // ────────────────────────────────────────────────────────────────────────── | |
| // parse_cond — parse condition code (eq, ne, lt, ge, hi, ls, etc.) | |
| // x0 = pointer (at first char of condition) | |
| // returns x0 = pointer past, x1 = cond code (0-14) | |
| // Uses cond_table in .rodata: 2-byte entries, index = code; cs/cc aliases at 15/16 | |
| // ────────────────────────────────────────────────────────────────────────── | |
| parse_cond: | |
| ldrh w9, [x0] | |
| add x0, x0, #2 | |
| lsr w10, w9, #8 // char1 | |
| eor w10, w10, w9 // char0 ^ char1 | |
| and w10, w10, #0x1F // 5-bit index | |
| adr x11, cond_xor_tbl | |
| ldrb w1, [x11, x10] | |
| ret | |
| parse_reg_fail: | |
| mov x0, #-1 | |
| ret | |
| // ────────────────────────────────────────────────────────────────────────── | |
| // encode_instruction — dispatch mnemonic, parse operands, emit | |
| // x0 = mnemonic start, x1 = mnemonic length, x2 = operands start | |
| // ────────────────────────────────────────────────────────────────────────── | |
| encode_instruction: | |
| // x19 already equals x0 (set by process_line before parse_ident) | |
| mov x20, x1 | |
| mov x21, x2 | |
| // dispatch on first character of mnemonic | |
| ldrb w9, [x19] | |
| ldrb w10, [x19, #1] | |
| cmp w9, #'a' | |
| b.eq ei_a | |
| cmp w9, #'b' | |
| b.eq ei_b | |
| cmp w9, #'c' | |
| b.eq ei_c | |
| cmp w9, #'e' | |
| mov x22, #2 // eor opc (doesn't affect flags) | |
| b.eq ei_logical | |
| cmp w9, #'l' | |
| b.eq ei_l | |
| cmp w9, #'m' | |
| b.eq ei_m | |
| cmp w9, #'n' | |
| b.eq ei_n | |
| cmp w9, #'o' | |
| mov x22, #1 // orr opc (doesn't affect flags) | |
| b.eq ei_logical | |
| cmp w9, #'r' | |
| b.eq ei_r | |
| cmp w9, #'s' | |
| b.eq ei_s | |
| cmp w9, #'t' | |
| b.eq ei_t | |
| cmp w9, #'u' | |
| b.ne ei_bad | |
| // udiv Rd, Rn, Rm / ubfx / ubfm / ubfiz / uxtb / uxth | |
| ei_u: | |
| cmp w10, #'b' | |
| b.eq ei_bfm_unified | |
| cmp w10, #'x' | |
| b.eq ei_sxt_uxt | |
| ei_udiv: | |
| mov w25, #0 | |
| ei_div_common: | |
| bl parse_3reg | |
| orr w9, w25, #0x0800 | |
| b emit_3reg_1AC0_tail | |
| // ── 'a' mnemonics: add, and, adrp ───────────────────────────────────── | |
| ei_a: | |
| cmp w10, #'d' | |
| b.eq ei_a_d | |
| cmp w10, #'n' | |
| movz w25, #0x2800 // ASRV opcode (speculative, harmless if AND) | |
| b.ne ei_shift_common | |
| sub x22, x20, #3 // len=3→0 (AND), len=4→1 | |
| add x22, x22, x22, lsl #1 // 0→0, 1→3 (ANDS opc) | |
| b ei_logical | |
| // sxtb/sxth/sxtw/uxtb/uxth Rd, Rn — SBFM/UBFM Rd, Rn, #0, #imms | |
| ei_sxt_uxt: | |
| bl parse_2reg | |
| mov x24, x0 | |
| mov x10, #0 // immr = 0 | |
| ldrb w9, [x19, #3] // suffix: 'b', 'h', or 'w' | |
| ubfx w11, w9, #3, #2 // 'b'→0, 'h'→1, 'w'→2 | |
| mov w12, #8 | |
| lsl w11, w12, w11 // 8, 16, 32 | |
| sub w11, w11, #1 // 7, 15, 31 | |
| ldrb w9, [x19] // 's' or 'u' | |
| cmp w9, #'s' | |
| b.ne ei_ubfm_emit // uxt → UBFM path (sxt falls through) | |
| ei_asr_sbfm: | |
| movz w0, #0x1300, lsl #16 // 32-bit SBFM base (sf+N applied later) | |
| b ei_bfm_apply_n_sf | |
| ei_a_d: | |
| ldrb w10, [x19, #2] | |
| cmp w10, #'d' | |
| b.eq ei_add | |
| cmp w10, #'r' | |
| b.ne ei_bad | |
| // adr/adrp shared: parse Rd, skip comma, precompute PC | |
| bl ws_x21_parse_reg | |
| mov x22, x0 // Rd | |
| bl ws_x2_skip1 // skip ',' | |
| ldr x9, [x28, #ST_TEXT_BASE] | |
| ldr x10, [x28, #ST_TEXT_POS] | |
| add x25, x9, x10 // x25 = PC | |
| cmp x20, #3 | |
| b.eq ei_adr_body | |
| // adrp: page-relative offset | |
| bl parse_label_ref | |
| and x23, x0, #~0xFFF | |
| and x9, x25, #~0xFFF | |
| sub x23, x23, x9 | |
| asr x23, x23, #12 | |
| b ei_adr_encode | |
| ei_adr_body: | |
| bl parse_expr0 | |
| sub x23, x0, x25 // imm21 = target - PC | |
| ei_adr_encode: | |
| // encoding: immlo = imm21[1:0], immhi = imm21[20:2] | |
| and w9, w23, #3 // immlo | |
| ubfx w10, w23, #2, #19 // immhi (19 bits) | |
| sub w23, w20, #3 // sf: 0=ADR(len3), 1=ADRP(len4) | |
| movz w0, #0x1000, lsl #16 // ADR base opcode | |
| orr w0, w0, w22 | |
| orr w0, w0, w9, lsl #29 | |
| orr w0, w0, w10, lsl #5 | |
| b emit_with_sf | |
| // ── 'b' mnemonics: b, bl, b.cond, bic, bfm, bfi, bfxil ──────────────── | |
| ei_b: | |
| cmp w10, #'f' | |
| b.eq ei_bfm_unified | |
| cmp x20, #3 | |
| b.eq ei_b3 | |
| b.hi ei_bad | |
| cmp w10, #'r' | |
| b.eq ei_br | |
| // b (len=1) or bl (len=2): bit 31 = len-1 | |
| sub x9, x20, #1 | |
| movz w22, #0x1400, lsl #16 | |
| orr w22, w22, w9, lsl #31 | |
| bl ws_x21 | |
| cmp w9, #'.' | |
| b.eq ei_bcond | |
| // B/BL: parse label, compute pc-relative offset | |
| bl parse_label_pc_rel | |
| and w0, w0, #0x3FFFFFF | |
| orr w0, w0, w22 | |
| b emit_inst_done | |
| // 3-char 'b' mnemonics: blr or bic | |
| ei_b3: | |
| ldrb w9, [x19, #2] | |
| cmp w9, #'r' | |
| b.eq ei_blr | |
| // bic Rd, Rn, Rm — AND Rd, Rn, ~Rm | |
| // sf 00 01010 sh 1 Rm imm6 Rn Rd | |
| ei_bic: | |
| bl parse_3reg | |
| movz w9, #0x0A20, lsl #16 // 32-bit BIC | |
| b emit_3reg_sf_tail | |
| // br Xn / blr Xn — branch (with link) to register | |
| // br: x20=2 (len), blr: x20=3 → sub 2 gives 0 or 1 for bit 21 | |
| ei_br: | |
| ei_blr: | |
| bl ws_x21_parse_reg | |
| sub w10, w20, #2 | |
| movz w9, #0xD61F, lsl #16 | |
| orr w9, w9, w10, lsl #21 // blr: set bit 21 → 0xD63F | |
| orr w0, w9, w0, lsl #5 | |
| b emit_inst_done | |
| // ── 'c' mnemonics: cmp, cbz, cbnz, clz, cset ────────────────────────── | |
| ei_c: | |
| cmp w10, #'m' | |
| b.eq ei_c_cm | |
| cmp w10, #'b' | |
| b.ne 2f | |
| ldrb w10, [x19, #2] | |
| cmp w10, #'z' | |
| cset x22, ne // x22=0 for cbz, 1 for cbnz | |
| b.eq ei_cbz_common | |
| cmp w10, #'n' | |
| b.ne ei_bad | |
| ei_cbz_common: | |
| bl parse_x23_ws | |
| mov x24, x1 // sf | |
| bl parse_label_pc_rel | |
| and w0, w0, #0x7FFFF | |
| orr w0, w23, w0, lsl #5 | |
| orr w0, w0, w22, lsl #24 | |
| movz w9, #0x3400, lsl #16 | |
| b ei_addsub_sf_emit | |
| 2: cmp w10, #'l' | |
| b.eq ei_clz | |
| ldrb w10, [x19, #3] | |
| cmp w10, #'n' | |
| movz w26, #0x0400 // CSINC bit (speculative) | |
| b.eq ei_csel_common | |
| cmp w10, #'l' | |
| movz w26, #0 // CSEL: no extra bits (speculative) | |
| b.eq ei_csel_common | |
| // cset Rd, cond — alias for CSINC Rd, xzr, xzr, invert(cond) | |
| // encoding: 0x9A9F0000 | (inv_cond << 12) | 0x07E0 | Rd | |
| ei_cset: | |
| bl ws_x21_parse_reg // x0 = Rd, x2 = pointer past | |
| mov x22, x0 // Rd | |
| bl ws_x2_skip1 // skip ',' | |
| bl parse_cond // x1 = cond code | |
| eor w1, w1, #1 // invert condition (flip bit 0) | |
| orr w0, w22, w1, lsl #12 // Rd | (inv_cond << 12) | |
| orr w0, w0, #0x7E0 // | Rn=xzr<<5, o2=1 | |
| movk w0, #0x9A9F, lsl #16 // | sf=1, opc, Rm=xzr | |
| b emit_inst_done | |
| // csel Rd, Rn, Rm, cond | |
| // encoding: sf 00 11010100 Rm cond 00 Rn Rd | |
| // 64-bit base: 0x9A800000 | |
| ei_csel_common: | |
| bl parse_3reg // x22=Rd, x23=sf, x24=Rn, x0=Rm | |
| mov x25, x0 // save Rm | |
| bl ws_x2_skip1 // skip ',' | |
| bl parse_cond // x1 = cond | |
| movz w9, #0x1A80, lsl #16 // CSEL 32-bit base | |
| orr w9, w9, w26 // | CSINC bit if set | |
| orr w9, w9, w1, lsl #12 // | (cond << 12) | |
| mov x0, x25 // Rm for emit_3reg_sf_tail | |
| b emit_3reg_sf_tail | |
| // ── 'l' mnemonics: ldr, ldrb, lsl, lsr ──────────────────────────────── | |
| ei_l: | |
| cmp w10, #'d' | |
| b.eq ei_ld | |
| // lsl/lsr — immediate (UBFM alias) or register (LSLV/LSRV) | |
| ei_ls_shift: | |
| ldrb w10, [x19, #2] | |
| movz w25, #0x2000 // LSLV | |
| cmp w10, #'r' | |
| b.ne ei_shift_common | |
| movz w25, #0x2400 // LSRV | |
| ei_shift_common: | |
| bl parse_2reg // x22=Rd, x23=sf, x0=Rn, x2=ptr past | |
| mov x24, x0 // Rn | |
| bl ws_x2_skip1 | |
| cmp w9, #'#' | |
| b.eq ei_shift_imm_dispatch | |
| // register form | |
| bl parse_register // Rm | |
| mov w9, w25 | |
| b emit_3reg_1AC0_tail | |
| // ── 'm' mnemonics: mov, movz, movn, movk, mul, msub, madd, mvn ──────── | |
| ei_m: | |
| cmp w10, #'o' | |
| b.eq ei_mo | |
| cmp w10, #'u' | |
| b.eq ei_mul | |
| cmp w10, #'s' | |
| movz x26, #0x8000 // MSUB bit15 (speculative) | |
| b.eq ei_madd_msub_common | |
| cmp w10, #'a' | |
| mov x26, #0 // MADD bit15 (speculative) | |
| b.eq ei_madd_msub_common | |
| // mvn Rd, Rm — alias for orn Rd, xzr, Rm | |
| ei_mvn: | |
| movz w25, #0x2A20, lsl #16 // 32-bit ORN base | |
| b ei_neg_mvn_common | |
| ei_mo: | |
| // mov (3 chars) vs movz/movn/movk (4 chars) | |
| cmp x20, #3 | |
| b.eq ei_mov | |
| ldrb w10, [x19, #3] | |
| cmp w10, #'z' | |
| movz w22, #0x5280, lsl #16 // MOVZ base (speculative) | |
| b.eq ei_movwide | |
| cmp w10, #'n' | |
| movz w22, #0x1280, lsl #16 // MOVN base (speculative) | |
| b.eq ei_movwide | |
| cmp w10, #'k' | |
| b.ne ei_bad | |
| movz w22, #0x7280, lsl #16 // MOVK base | |
| b ei_movwide | |
| // ── 's' mnemonics: sub, str, strb, svc, sbfm, sbfx, sbfiz, sxt* ────── | |
| ei_s: | |
| cmp w10, #'u' | |
| b.eq ei_su | |
| cmp w10, #'t' | |
| b.eq ei_st | |
| cmp w10, #'v' | |
| b.eq ei_svc | |
| cmp w10, #'b' | |
| b.eq ei_bfm_unified | |
| cmp w10, #'x' | |
| b.eq ei_sxt_uxt | |
| ei_sd: | |
| mov w25, #0x400 | |
| b ei_div_common | |
| ei_bad: | |
| adr x0, msg_badins | |
| bl error_at | |
| ei_ret: | |
| movz w0, #0x03C0 | |
| movk w0, #0xD65F, lsl #16 | |
| b emit_inst_done | |
| ei_svc: | |
| bl ws_x21 | |
| bl parse_hash_imm // x0 = imm16 value | |
| and w9, w0, #0xFFFF | |
| movz w0, #0x0001 | |
| movk w0, #0xD400, lsl #16 // 0xD4000001 | |
| orr w0, w0, w9, lsl #5 | |
| b emit_inst_done | |
| ei_bcond: | |
| add x0, x0, #1 // skip '.' | |
| movz w22, #0x5400, lsl #16 // 0x54000000 | |
| bl parse_cond | |
| orr w22, w22, w1 // base | cond | |
| bl parse_label_pc_rel | |
| // 0x54000000 | (imm19 << 5) | cond | |
| and w0, w0, #0x7FFFF | |
| orr w0, w22, w0, lsl #5 | |
| b emit_inst_done | |
| // clz Rd, Rn — 64-bit: 0xDAC01000, 32-bit: 0x5AC01000 | |
| ei_clz: | |
| mov w25, #0x1000 | |
| b ei_clz_rbit_common | |
| // rbit Rd, Rn — 64-bit: 0xDAC00000, 32-bit: 0x5AC00000 | |
| ei_r: | |
| cmp w10, #'e' | |
| b.eq ei_ret | |
| cmp w10, #'o' | |
| b.eq ei_ror | |
| ei_rbit: | |
| mov w25, #0 | |
| ei_clz_rbit_common: | |
| bl parse_2reg // x22=Rd, x23=sf, x0=Rn | |
| mov x24, x0 // Rn for emit_3reg_sf_tail | |
| movz w9, #0x5AC0, lsl #16 // 32-bit base | |
| orr w9, w9, w25 // opcode (0x1000 for clz, 0 for rbit) | |
| mov x0, #0 // no Rm field | |
| b emit_3reg_sf_tail | |
| // ror Rd, Rn, Rm — RORV: 0x1AC02C00 (32-bit) / 0x9AC02C00 (64-bit) | |
| ei_ror: | |
| bl parse_3reg | |
| movz w9, #0x2C00 | |
| b emit_3reg_1AC0_tail | |
| // add/adds Rd, Rn, #imm / Rm [, lsl #N] / :lo12:sym | |
| ei_add: | |
| mov x22, #0 // op=0 (ADD) | |
| b ei_addsub_s | |
| // sub/subs Rd, Rn, #imm / Rm | |
| ei_su: | |
| ei_sub: | |
| movz x22, #0x4000, lsl #16 // op=1 (SUB) | |
| ei_addsub_s: | |
| sub x9, x20, #3 // 0 for len=3, 1 for len=4 | |
| orr x22, x22, x9, lsl #29 // set S flag if len=4 | |
| ei_addsub: | |
| bl parse_x23_ws | |
| mov x24, x1 // sf | |
| bl parse_register | |
| mov x25, x0 // save Rn | |
| bl ws_x2_skip1 // skip ',' | |
| // is the third operand a register or immediate? | |
| ei_addsub_operand: | |
| cmp w9, #'a' | |
| b.lo ei_addsub_imm // '#' or ':lo12:' (both < 'a') | |
| // register form: add Rd, Rn, Rm [, lsl #N] | |
| bl parse_register | |
| mov x21, x0 // Rm | |
| bl ws_x2 | |
| // check for optional ", lsl #N" | |
| cmp w9, #',' | |
| mov x9, #0 // shift amount default 0 (doesn't affect flags) | |
| b.ne ei_addsub_reg_emit | |
| bl skip_lsl // skip ", lsl" + parse_hash_imm | |
| mov x9, x0 // shift amount | |
| ei_addsub_reg_emit: | |
| // sf op 0 01011 shift 0 Rm imm6 Rn Rd | |
| // shift = 00 (LSL) | |
| and w11, w9, #0x3F | |
| orr w0, w23, w25, lsl #5 // Rd | (Rn << 5) | |
| orr w0, w0, w11, lsl #10 // imm6 | |
| orr w0, w0, w21, lsl #16 // Rm | |
| orr w0, w0, w22 // op|S bits | |
| movz w9, #0x0B00, lsl #16 | |
| b ei_addsub_sf_emit | |
| ei_addsub_imm: | |
| // immediate form: #expr or #:lo12:expr | |
| bl parse_hash_imm // x0=val, x2=is_lo12 | |
| lsr x9, x0, #12 | |
| cbnz x9, ei_logical_bad // imm12 out of range (0-4095) | |
| // sf op 0 10001 shift imm12 Rn Rd | |
| orr w9, w23, w25, lsl #5 // Rd | (Rn << 5) | |
| orr w9, w9, w0, lsl #10 // imm12 (bits 12+ known zero) | |
| orr w9, w9, w22 // op|S bits | |
| movz w0, #0x1100, lsl #16 | |
| // shared tail: w9=opcode bits (0x0B00 or 0x1100 << 16), x24=sf, w0=partial insn | |
| ei_addsub_sf_emit: | |
| orr w0, w0, w9 | |
| b emit_with_sf24 | |
| // cmp/cmn Rn, #imm / cmp/cmn Rn, Rm — reuse addsub with Rd=xzr | |
| ei_c_cm: | |
| ldrb w10, [x19, #2] | |
| cmp w10, #'n' | |
| movz x22, #0x6000, lsl #16 // CMP: SUBS bits 30:29 = 11 | |
| b.ne 1f | |
| movz x22, #0x2000, lsl #16 // CMN: ADDS bits 30:29 = 01 | |
| 1: mov x23, #31 // Rd = xzr | |
| bl ws_x21_parse_reg // first operand = Rn | |
| mov x24, x1 // sf | |
| mov x25, x0 // save Rn | |
| bl ws_x2_skip1 // skip ',' | |
| b ei_addsub_operand | |
| // and/eor/orr — immediate (bitmask) or register | |
| ei_logical: | |
| bl parse_x23_ws | |
| mov x24, x1 // sf | |
| bl parse_register | |
| mov x26, x0 // Rn | |
| bl ws_x2_skip1 | |
| ei_logical_operand: | |
| cmp w9, #'#' | |
| b.eq ei_logical_imm | |
| // register form: sf opc 01010 sh 0 Rm imm6 Rn Rd | |
| bl parse_register | |
| mov x21, x0 // Rm | |
| bl ws_x2 | |
| mov w25, #0 // shift amount = 0 default | |
| cmp w9, #',' | |
| b.ne ei_logical_reg_emit | |
| bl skip_lsl // skip ", lsl" + parse_hash_imm | |
| mov w25, w0 // shift amount | |
| ei_logical_reg_emit: | |
| orr w0, w23, w26, lsl #5 // Rd | (Rn << 5) | |
| orr w0, w0, w25, lsl #10 // imm6 (shift amount) | |
| orr w0, w0, w21, lsl #16 | |
| orr w0, w0, w22, lsl #29 | |
| movz w9, #0x0A00, lsl #16 | |
| b ei_addsub_sf_emit | |
| ei_logical_imm: | |
| bl parse_hash_imm | |
| eor x1, x24, #1 // is_32bit = !sf | |
| bl encode_logical_imm | |
| // x0 = (N<<12)|(immr<<6)|imms | |
| orr w9, w23, w26, lsl #5 // Rd | (Rn << 5) | |
| orr w9, w9, w0, lsl #10 // | N/immr/imms | |
| orr w9, w9, w22, lsl #29 | |
| movz w0, #0x1200, lsl #16 // 100100 in bits 28:23 | |
| b ei_addsub_sf_emit // orr w0|w9, apply sf, emit | |
| // tst Rn, #imm / Rm — alias for ANDS XZR, Rn, operand | |
| ei_tst: | |
| mov x22, #3 // opc = ANDS | |
| mov x23, #31 // Rd = XZR | |
| bl ws_x21_parse_reg // parse Rn | |
| mov x24, x1 // sf | |
| mov x26, x0 // Rn | |
| bl ws_x2_skip1 // skip ',' | |
| b ei_logical_operand | |
| ei_logical_bad: | |
| adr x0, msg_badimm | |
| bl error_at | |
| // ldr/ldrb/str/strb/ldp/stp — multiple addressing modes | |
| ei_ld: | |
| ei_st: | |
| cmp w9, #'l' | |
| cset x22, eq // 1 for load ('l'), 0 for store ('s') | |
| ei_ldst_dispatch: | |
| ldrb w10, [x19, #2] | |
| cmp w10, #'p' | |
| b.eq ei_ldst_pair | |
| sub x24, x20, #3 // 0 for ldr/str (len=3), 1 for ldrb/strb/ldrh/strh (len=4) | |
| ei_ldst: | |
| bl parse_x23_ws | |
| mov x21, x1 // sf (size for non-byte) | |
| // precompute size encoding: 0=byte, 1=half, 2=32bit, 3=64bit | |
| add w20, w21, #2 // 2 or 3 | |
| cbz x24, 1f // len=3: use sf+2 | |
| ldrb w20, [x19, #3] // 'b'=0x62, 'h'=0x68, 's'=0x73 | |
| cmp w20, #'s' | |
| b.eq ei_ldrs_size // sign-extending load (ldrsb/ldrsh/ldrsw) | |
| ubfx w20, w20, #3, #2 // 0 for byte, 1 for half | |
| 1: // literal load check: ldr Rt, label (no bracket) | |
| cbz x22, ei_ldst_bracket // store: must have [ | |
| cbnz x24, ei_ldst_bracket // ldrb/ldrh: must have [ | |
| cmp w9, #'[' | |
| b.ne ei_ldr_literal | |
| ei_ldst_bracket: | |
| bl skip1_ws // skip '[' | |
| bl parse_register // Rn | |
| mov x25, x0 // save Rn | |
| bl ws_x2 | |
| cmp w9, #']' | |
| b.eq ei_ldst_base_only | |
| cmp w9, #',' | |
| b.ne pe_atom_err | |
| bl skip1_ws | |
| cmp w9, #'a' | |
| b.lo ei_ldst_uimm // '#' or ':lo12:' (both < 'a') | |
| // register offset: Rm [, lsl #N] | |
| bl parse_register | |
| mov x24, x0 // save Rm | |
| bl ws_x2 | |
| mov w10, #0 // S=0 | |
| cmp w9, #']' | |
| b.eq ei_ldst_reg_emit | |
| bl skip_lsl // skip ", lsl" + parse_hash_imm | |
| cbz x0, ei_ldst_reg_emit | |
| mov w10, #1 // S=1 | |
| ei_ldst_reg_emit: | |
| bl ldst_base | |
| orr w0, w0, w10, lsl #12 // S bit | |
| orr w0, w0, w24, lsl #16 // Rm | |
| movz w9, #0x6800 // 0x800 | 0x6000 | |
| movk w9, #0x3820, lsl #16 // | 0x38000000 | 0x00200000 | |
| b orr_w9_emit | |
| ei_ldst_base_only: | |
| bl skip1_ws // skip ']' | |
| cmp w9, #',' | |
| b.eq ei_ldst_post | |
| mov x0, #0 | |
| b ei_ldst_uimm_encode | |
| ei_ldst_uimm: | |
| bl parse_hash_imm // x0=value, x1=ptr past imm | |
| // check for pre-index: [Rn, #simm9]! | |
| ldrb w9, [x1] | |
| cmp w9, #']' | |
| b.ne ei_ldst_uimm_encode | |
| ldrb w9, [x1, #1] | |
| cmp w9, #'!' | |
| b.ne ei_ldst_uimm_encode | |
| // pre-index encoding | |
| and w10, w0, #0x1FF | |
| bl ldst_base | |
| orr w0, w0, #0x00000C00 // pre-index: bits[11:10] = 11 | |
| b ei_ldst_simm9_tail | |
| ei_ldst_uimm_encode: | |
| tbnz x0, #63, ei_ldst_unscaled // negative → LDUR/STUR encoding | |
| lsr x0, x0, x20 | |
| and w10, w0, #0xFFF | |
| bl ldst_base | |
| orr w0, w0, w10, lsl #10 | |
| movz w9, #0x3900, lsl #16 | |
| b orr_w9_emit | |
| ei_ldst_unscaled: | |
| and w10, w0, #0x1FF | |
| bl ldst_base | |
| b ei_ldst_simm9_tail // bits[11:10] = 00 (unscaled) | |
| ei_ldst_post: | |
| bl skip1_ws // skip ',' | |
| bl parse_hash_imm | |
| and w10, w0, #0x1FF | |
| bl ldst_base | |
| orr w0, w0, #0x00000400 // post-index: bits[11:10] = 01 | |
| ei_ldst_simm9_tail: // shared by pre-index and post-index | |
| orr w0, w0, w10, lsl #12 // imm9 at [20:12] | |
| orr w0, w0, #0x38000000 | |
| b emit_inst_done | |
| // sign-extending load: determine size and opc from mnemonic suffix + dest register | |
| // x21=sf (from parse_x23_ws), x19=mnemonic | |
| ei_ldrs_size: | |
| ldrb w9, [x19, #4] // 5th char: 'b','h','w' | |
| ubfx w20, w9, #3, #2 // 'b'→0, 'h'→1, 'w'→2 | |
| mov w22, #3 | |
| sub w22, w22, w21 // opc = 3 - sf (Xd→2, Wd→3) | |
| b ei_ldst_bracket | |
| // ldr Rt, label — PC-relative literal load | |
| // x23=Rt, x21=sf, x0=pointer to label | |
| ei_ldr_literal: | |
| bl parse_label_pc_rel // x0 = (target - PC) / 4 | |
| ubfiz w0, w0, #5, #19 // imm19 << 5 | |
| orr w0, w0, w23 // Rt | |
| movz w9, #0x1800, lsl #16 // 32-bit base (0x18000000) | |
| orr w9, w9, w21, lsl #30 // sf=1 → 0x58000000 for 64-bit | |
| b orr_w9_emit | |
| ei_ldst_pair: | |
| // x22=L (already set by ei_ld/ei_st) | |
| bl parse_x23_ws | |
| mov x21, x1 // save sf (0=32-bit, 1=64-bit) | |
| bl parse_register // Rt2 | |
| mov x24, x0 // Rt2 | |
| bl ws_x2_skip1 // skip ',' | |
| bl skip1_ws // skip '[' | |
| bl parse_register // Rn | |
| mov x25, x0 // Rn | |
| mov x26, #0 // addressing mode: 0=signed-offset | |
| bl ws_x2 | |
| cmp w9, #']' | |
| b.eq ei_pair_close | |
| bl skip1_ws // skip ',' | |
| bl parse_hash_imm | |
| // x0=value, x1=ptr past imm | |
| // check for pre-index: ']' then '!' | |
| mov x20, x0 // save imm value | |
| bl ws_x1 // skip_ws from ptr past imm | |
| cmp w9, #']' | |
| b.ne ei_pair_pre_done | |
| bl skip1_ws | |
| cmp w9, #'!' | |
| b.ne ei_pair_pre_done | |
| movz w26, #0x0080, lsl #16 // pre-index: XOR sets bit 23 | |
| ei_pair_pre_done: | |
| mov x0, x20 // restore imm value | |
| b ei_pair_encode | |
| ei_pair_close: | |
| // saw ']' — check for post-index: ], #imm | |
| bl skip1_ws // skip ']' | |
| cmp w9, #',' | |
| b.ne 1f | |
| bl skip1_ws // skip ',' | |
| bl parse_hash_imm | |
| movz w26, #0x0180, lsl #16 // post-index: XOR flips bit24 off, bit23 on | |
| b ei_pair_encode | |
| 1: mov x0, #0 // base-only: offset=0 | |
| ei_pair_encode: | |
| add w10, w21, #2 // shift: 2 (32-bit) or 3 (64-bit) | |
| asr w0, w0, w10 | |
| and w0, w0, #0x7F // imm7 | |
| movz w9, #0x2900, lsl #16 // 32-bit STP/LDP base (signed offset) | |
| orr w9, w9, w21, lsl #31 // sf=1 → 0xA900 | |
| eor w9, w9, w26 // apply addressing mode bits | |
| orr w9, w9, w22, lsl #22 | |
| orr w9, w9, w0, lsl #15 | |
| orr w9, w9, w24, lsl #10 | |
| orr w9, w9, w25, lsl #5 | |
| orr w0, w9, w23 | |
| b emit_inst_done | |
| // madd/msub Rd, Rn, Rm, Ra — 0x1B000000 (32) / 0x9B000000 (64) | |
| ei_madd_msub_common: | |
| bl parse_3reg // x22=Rd, x23=sf, x24=Rn, x0=Rm | |
| mov x25, x0 // save Rm | |
| bl ws_x2_skip1 // skip ',' | |
| bl parse_register // Ra | |
| orr w9, w22, w0, lsl #10 // Rd | (Ra << 10) | |
| orr w9, w9, w24, lsl #5 // | (Rn << 5) | |
| orr w9, w9, w25, lsl #16 // | (Rm << 16) | |
| mov w0, w26 // bit15 (0 or 0x8000) | |
| movk w0, #0x1B00, lsl #16 // 32-bit base | |
| orr w0, w0, w23, lsl #31 // sf | |
| b orr_w9_emit | |
| // mul Rd, Rn, Rm — MADD Rd, Rn, Rm, XZR | |
| // 64-bit: 0x9B007C00 | (Rm<<16) | (Rn<<5) | Rd | |
| // 32-bit: 0x1B007C00 | ... | |
| ei_mul: | |
| bl parse_3reg | |
| movz w9, #0x7C00 | |
| movk w9, #0x1B00, lsl #16 // 32-bit base | |
| b emit_3reg_sf_tail | |
| ei_shift_imm_dispatch: | |
| ei_shift_imm: | |
| bl parse_hash_imm | |
| mov x21, x0 // shift amount | |
| mov x11, #31 | |
| add x11, x11, x23, lsl #5 // size-1 = 31 or 63 (shared) | |
| ldrb w9, [x19, #2] | |
| cmp w9, #'r' | |
| b.eq ei_lsr_asr_imm | |
| // LSL #n: UBFM Rd, Rn, #(-n mod size), #(size-1-n) | |
| neg x10, x21 | |
| and x10, x10, x11 // immr = (-n) & (size-1) | |
| sub x11, x11, x21 // imms = (size-1) - n | |
| b ei_ubfm_emit | |
| ei_lsr_asr_imm: | |
| mov x10, x21 // immr = n | |
| tbnz w25, #11, ei_asr_sbfm // bit 11 set in w25 = ASR (0x2800) | |
| ei_ubfm_emit: | |
| movz w0, #0x5300, lsl #16 // UBFM base (sf+N applied below) | |
| ei_bfm_apply_n_sf: | |
| orr w0, w0, w23, lsl #22 // N bit = sf | |
| ei_ubfm_orr: | |
| orr w0, w0, w22 | |
| orr w0, w0, w24, lsl #5 | |
| orr w0, w0, w11, lsl #10 | |
| orr w0, w0, w10, lsl #16 | |
| b emit_with_sf | |
| // ── unified bitfield handler (ubfx/ubfm/ubfiz/sbfx/sbfm/sbfiz/bfm/bfi/bfxil) | |
| ei_bfm_unified: | |
| bl parse_2reg // x22=Rd, x23=sf, x0=Rn, x2=ptr past | |
| mov x24, x0 // Rn | |
| bl ws_x2_skip1 // skip ',' | |
| bl parse_hash_imm // #op3 | |
| mov x25, x0 | |
| bl ws_x1 | |
| bl skip1_ws // skip ',' | |
| bl parse_hash_imm // #op4 | |
| mov x9, x0 // op4 in x9 | |
| // determine base opcode from mnemonic first char (x19 preserved) | |
| ldrb w10, [x19] | |
| mov w11, #3 // suffix offset for u*/s* prefix | |
| movz w0, #0x1300, lsl #16 // SBFM | |
| cmp w10, #'s' | |
| b.eq 1f | |
| movz w0, #0x5300, lsl #16 // UBFM | |
| cmp w10, #'u' | |
| b.eq 1f | |
| movz w0, #0x3300, lsl #16 // BFM | |
| mov w11, #2 // suffix offset for b* prefix | |
| 1: ldrb w11, [x19, x11] // load distinguishing char | |
| cmp w11, #'x' | |
| b.eq bfm_extract_apply | |
| cmp w11, #'m' | |
| b.eq bfm_raw_apply | |
| // insert: immr=(-lsb) mod size, imms=width-1 (fall-through from dispatch) | |
| bfm_insert_apply: | |
| sub x11, x9, #1 | |
| mov x10, #31 | |
| add x10, x10, x23, lsl #5 // size-1 = 31 or 63 | |
| neg x9, x25 | |
| and x10, x9, x10 | |
| b ei_bfm_apply_n_sf | |
| // extract: immr=lsb(x25), imms=lsb+width-1 (falls through to raw) | |
| bfm_extract_apply: | |
| add x9, x25, x9 | |
| sub x9, x9, #1 | |
| // raw: immr=x25, imms=x9 | |
| bfm_raw_apply: | |
| mov x10, x25 | |
| mov x11, x9 | |
| b ei_bfm_apply_n_sf | |
| // (bitfield handlers unified into ei_bfm_unified above) | |
| // mov — multiple forms | |
| ei_mov: | |
| bl ws_x21_parse_reg | |
| mov x22, x0 // Rd | |
| mov x23, x1 // sf | |
| bl ws_x2_skip1 // skip ',' | |
| cmp w9, #'#' | |
| b.eq ei_mov_imm | |
| // register form | |
| bl parse_register | |
| // if either reg is 31, use ADD Rd, Rn, #0 (handles SP) | |
| cmp x22, #31 | |
| b.eq ei_mov_add | |
| cmp x0, #31 | |
| b.eq ei_mov_add | |
| // ORR Rd, XZR, Rm — x0 = Rm from parse_register | |
| movz w9, #0x03E0 | |
| movk w9, #0x2A00, lsl #16 | |
| orr w9, w9, w22 | |
| orr w0, w9, w0, lsl #16 | |
| b emit_with_sf // x23=sf: sets bit 31 if 64-bit | |
| ei_mov_add: | |
| // ADD Rd, Rn, #0 — x0 = Rm from parse_register | |
| orr w0, w22, w0, lsl #5 | |
| movk w0, #0x1100, lsl #16 | |
| b emit_with_sf // x23 = sf | |
| ei_mov_imm: | |
| bl parse_hash_imm | |
| mov x24, x0 | |
| mov x26, #0 // phase: 0=MOVZ, 1=MOVN | |
| ei_mov_try_phase: | |
| mov x25, #0 // hw shift counter (reset each phase) | |
| ei_mov_hw_loop: | |
| lsr x9, x24, x25 | |
| and x9, x9, #0xFFFF | |
| lsl x11, x9, x25 | |
| cmp x11, x24 | |
| b.eq ei_mov_found | |
| add x25, x25, #16 | |
| cmp x25, #64 | |
| b.lt ei_mov_hw_loop | |
| // try MOVN phase | |
| cbnz x26, ei_logical_bad | |
| mvn x24, x24 | |
| mov x26, #1 | |
| b ei_mov_try_phase | |
| ei_mov_found: | |
| // x9 = imm16, x25 = shift, x26 = phase (0=MOVZ, 1=MOVN) | |
| movz w0, #0x5280, lsl #16 // MOVZ base | |
| sub w0, w0, w26, lsl #30 // MOVN: subtract 0x40000000 (clear bit 30) | |
| orr w0, w0, w23, lsl #31 // sf bit | |
| orr w0, w0, w22 // Rd | |
| orr w0, w0, w9, lsl #5 // imm16 | |
| orr w0, w0, w25, lsl #17 // hw (shift_amount << 17 = hw << 21) | |
| b emit_inst_done | |
| // movz/movn/movk Rd, #imm16 [, lsl #N] | |
| ei_movwide: | |
| bl parse_x23_ws | |
| mov x24, x1 // sf | |
| bl parse_hash_imm // #imm16 | |
| and w25, w0, #0xFFFF // imm16 (callee-saved) | |
| // check for optional ", lsl #N" | |
| bl ws_x1 | |
| cmp w9, #',' | |
| mov w10, #0 // hw = 0 default (doesn't affect flags) | |
| b.ne ei_movwide_emit | |
| bl skip_lsl // skip ", lsl" + parse_hash_imm | |
| mov w10, w0 // raw shift amount | |
| ei_movwide_emit: | |
| orr w0, w22, w24, lsl #31 // base | sf | |
| orr w0, w0, w23 // | Rd | |
| orr w0, w0, w25, lsl #5 // | imm16 | |
| orr w0, w0, w10, lsl #17 // | hw (shift<<17 = hw<<21) | |
| b emit_inst_done | |
| // tbz/tbnz Rt, #bit, label — b5 011011 op b40 imm14 Rt | |
| ei_t: | |
| cmp w10, #'s' | |
| b.eq ei_tst | |
| ldrb w9, [x19, #2] | |
| cmp w9, #'z' | |
| b.eq 1f | |
| cmp w9, #'n' | |
| b.ne ei_bad | |
| 1: sub x22, x20, #3 // 0 for tbz (len=3), 1 for tbnz (len=4) | |
| ei_tbz_common: | |
| bl parse_x23_ws | |
| bl parse_hash_imm | |
| mov x24, x0 // bit number | |
| bl ws_x1 | |
| add x0, x0, #1 // skip ',' | |
| bl parse_label_pc_rel | |
| and w0, w0, #0x3FFF | |
| orr w0, w23, w0, lsl #5 | |
| bfi w0, w24, #19, #5 | |
| lsr w9, w24, #5 | |
| orr w0, w0, w9, lsl #31 | |
| orr w0, w0, w22, lsl #24 | |
| movz w9, #0x3600, lsl #16 | |
| orr_w9_emit: | |
| orr w0, w0, w9 | |
| b emit_inst_done | |
| // neg Rd, Rm — alias for sub Rd, xzr, Rm / nop | |
| ei_n: | |
| cmp w10, #'o' | |
| b.eq ei_nop | |
| ei_neg: | |
| movz w25, #0x4B00, lsl #16 // 32-bit SUB base | |
| ei_neg_mvn_common: | |
| bl parse_2reg // x22=Rd, x23=sf, x0=Rm | |
| mov x24, #31 // Rn = xzr | |
| mov w9, w25 | |
| b emit_3reg_sf_tail | |
| // nop — 0xD503201F | |
| ei_nop: | |
| movz w0, #0x201F | |
| movk w0, #0xD503, lsl #16 | |
| b emit_inst_done | |
| // ── shared emit tails ───────────────────────────────────────────────────── | |
| // emit_3reg_sf_tail: w9=32-bit base, x23=sf -> set bit31 if sf, then emit_3reg_tail | |
| // emit_3reg_tail: w9=base, x0=Rm, x22=Rd, x24=Rn -> emit and done | |
| // emit_3reg_1AC0_tail: w9=low opcode bits, x23=sf, x0=Rm, x22=Rd, x24=Rn | |
| // completes with 0x1AC0/0x9AC0 opcode and emits | |
| emit_3reg_1AC0_tail: | |
| movk w9, #0x1AC0, lsl #16 | |
| emit_3reg_sf_tail: | |
| orr w9, w9, w23, lsl #31 | |
| emit_3reg_tail: | |
| orr w9, w9, w22 | |
| orr w9, w9, w24, lsl #5 | |
| orr w0, w9, w0, lsl #16 | |
| b emit_inst_done | |
| // ldst_base: compute size<<30 | opc<<22 | Rn<<5 | Rt for load/store encodings | |
| // reads x20=size, x22=opc, w23=Rt, x25=Rn; returns w0=partial insn | |
| ldst_base: | |
| lsl w0, w20, #30 | |
| orr w0, w0, w22, lsl #22 | |
| orr w0, w0, w23 | |
| orr w0, w0, w25, lsl #5 | |
| ret | |
| // ══════════════════════════════════════════════════════════════════════════ | |
| // Compression — two-tier dictionary encoder | |
| // ══════════════════════════════════════════════════════════════════════════ | |
| // ────────────────────────────────────────────────────────────────────────── | |
| // compress_text — compress text_buf into input_buf using dictionary | |
| // | |
| // Input: x27 = text_buf, [x28, #ST_TEXT_POS] = text size | |
| // Output: x0 = compressed stream size (bytes) | |
| // Uses input_buf as scratch (safe — input already consumed) | |
| // ────────────────────────────────────────────────────────────────────────── | |
| compress_text: | |
| // leaf function — no bl calls, caller doesn't need x19/x20 preserved | |
| adr x10, full_dict | |
| add x11, x10, #FULL_DICT_SIZE // half_dict = full_dict + 504 | |
| mov x12, x27 // src = text_buf | |
| ldr x1, [x28, #ST_TEXT_POS] | |
| add x13, x12, x1 // src_end | |
| add x2, x28, #INPUT_BUF_OFF // dst = input_buf | |
| ct_loop: | |
| cmp x12, x13 | |
| b.hs ct_done | |
| ldr w3, [x12], #4 | |
| // scan full_dict (126 entries) | |
| mov x4, x10 | |
| mov w5, #1 | |
| ct_full: | |
| ldr w6, [x4], #4 | |
| cmp w3, w6 | |
| b.eq ct_emit_full | |
| add w5, w5, #1 | |
| cmp w5, #(FULL_DICT_ENTRIES + 1) | |
| b.lo ct_full | |
| // try half dict | |
| lsr w7, w3, #16 | |
| mov x4, x11 | |
| mov w5, #0x80 | |
| ct_half: | |
| ldrh w6, [x4], #2 | |
| cmp w7, w6 | |
| b.eq ct_emit_half | |
| add w5, w5, #1 | |
| cmp w5, #(0x80 + HALF_DICT_ENTRIES) | |
| b.lo ct_half | |
| // raw escape | |
| mov w5, #0x7F | |
| strb w5, [x2], #1 | |
| str w3, [x2], #4 | |
| b ct_loop | |
| ct_emit_full: | |
| strb w5, [x2], #1 | |
| b ct_loop | |
| ct_emit_half: | |
| strb w5, [x2], #1 | |
| strh w3, [x2], #2 | |
| b ct_loop | |
| ct_done: | |
| strb wzr, [x2], #1 // end marker | |
| sub x0, x2, x28 | |
| sub x0, x0, #INPUT_BUF_OFF // x0 = compressed size | |
| ret | |
| // ── full instruction dictionary (126 entries) ────────────────── | |
| // generated by gen_dict.py — do not edit manually | |
| full_dict: | |
| .word 0xd65f03c0 | |
| .word 0x91000400 | |
| .word 0xaa0003f8 | |
| .word 0x540000a1 | |
| .word 0x39400009 | |
| .word 0xaa0003f4 | |
| .word 0xaa0003f9 | |
| .word 0x54000060 | |
| .word 0x7100b13f | |
| .word 0x7101b95f | |
| .word 0xaa0103f8 | |
| .word 0xaa1303e0 | |
| .word 0xd4000001 | |
| .word 0xf9400f8b | |
| .word 0x14000002 | |
| .word 0x2a160000 | |
| .word 0x39400a6a | |
| .word 0x54000041 | |
| .word 0x7100255f | |
| .word 0x71008d3f | |
| .word 0x7101753f | |
| .word 0xaa0003f3 | |
| .word 0xaa1403e0 | |
| .word 0x17fffff5 | |
| .word 0x38401409 | |
| .word 0x5100c12a | |
| .word 0x7101853f | |
| .word 0xaa0003f6 | |
| .word 0xaa0103f5 | |
| .word 0xaa1e03f0 | |
| .word 0xd2800000 | |
| .word 0x1200200a | |
| .word 0x14000005 | |
| .word 0x17ffffef | |
| .word 0x2a160129 | |
| .word 0x2a170000 | |
| .word 0x38001445 | |
| .word 0x39400a69 | |
| .word 0x5100c14b | |
| .word 0x52800019 | |
| .word 0x54000061 | |
| .word 0x54000080 | |
| .word 0x540000c0 | |
| .word 0x54000100 | |
| .word 0x54000120 | |
| .word 0x54000140 | |
| .word 0x540001a0 | |
| .word 0x540001e1 | |
| .word 0x54ffff63 | |
| .word 0x6b0b015f | |
| .word 0x7100257f | |
| .word 0x7100b93f | |
| .word 0x7101895f | |
| .word 0x7101915f | |
| .word 0x7101b15f | |
| .word 0x7101b93f | |
| .word 0x7101bd5f | |
| .word 0x7101c93f | |
| .word 0x7101c95f | |
| .word 0x7101cd3f | |
| .word 0x7101cd5f | |
| .word 0x7101d13f | |
| .word 0x7101d55f | |
| .word 0x7101e15f | |
| .word 0x7101e95f | |
| .word 0xa90157f4 | |
| .word 0xa9025ff6 | |
| .word 0xa9bc4ffe | |
| .word 0xaa0003e9 | |
| .word 0xaa0003f5 | |
| .word 0xaa0103f4 | |
| .word 0xaa0103f7 | |
| .word 0xaa1003fe | |
| .word 0xaa1403e1 | |
| .word 0xd280001a | |
| .word 0xf1000e9f | |
| .word 0xf86b6b8a | |
| .word 0xf9001bfe | |
| .word 0xf940038a | |
| .word 0xf9401bfe | |
| .word 0xf9401f89 | |
| .word 0x00000000 | |
| .word 0x110004a5 | |
| .word 0x12004800 | |
| .word 0x14000003 | |
| .word 0x14000004 | |
| .word 0x14000006 | |
| .word 0x14000008 | |
| .word 0x14000009 | |
| .word 0x1400017b | |
| .word 0x17ffffd7 | |
| .word 0x17ffffdd | |
| .word 0x17ffffdf | |
| .word 0x17fffff9 | |
| .word 0x2a0016c0 | |
| .word 0x2a0016e0 | |
| .word 0x2a004120 | |
| .word 0x2a090000 | |
| .word 0x2a091400 | |
| .word 0x2a0a3000 | |
| .word 0x2a0b2800 | |
| .word 0x2a154000 | |
| .word 0x321b014b | |
| .word 0x3940040a | |
| .word 0x5101856b | |
| .word 0x52a26000 | |
| .word 0x52aa6000 | |
| .word 0x54000081 | |
| .word 0x540000a0 | |
| .word 0x54000101 | |
| .word 0x540001c0 | |
| .word 0x540002a0 | |
| .word 0x7100657f | |
| .word 0x71009d3f | |
| .word 0x7100b53f | |
| .word 0x7100c13f | |
| .word 0x7100e93f | |
| .word 0x7101713f | |
| .word 0x71017d5f | |
| .word 0x7101893f | |
| .word 0x7101953f | |
| .word 0x7101dd3f | |
| .word 0x92800c60 | |
| .word 0x9400000c | |
| .word 0x97fffe94 | |
| .word 0x9a9f17e1 | |
| // ── top-half dictionary (128 entries, packed as 64 words) ───── | |
| half_dict: | |
| .word 0x540097ff | |
| .word 0x17ffd280 | |
| .word 0x14009100 | |
| .word 0x94007101 | |
| .word 0x3940f940 | |
| .word 0xb4005280 | |
| .word 0x54ffd100 | |
| .word 0x2a19f900 | |
| .word 0xaa007100 | |
| .word 0xb5ff1200 | |
| .word 0x2a001000 | |
| .word 0x2a182a16 | |
| .word 0x91048b0c | |
| .word 0xaa029240 | |
| .word 0x2a172a0a | |
| .word 0x38403400 | |
| .word 0xf10052a2 | |
| .word 0x52841100 | |
| .word 0x8b0b8b09 | |
| .word 0x91038b1d | |
| .word 0xaa09aa01 | |
| .word 0xb7f8aa0c | |
| .word 0x2a01dac0 | |
| .word 0x2a1a2a09 | |
| .word 0x5000321b | |
| .word 0x52855100 | |
| .word 0x700052a6 | |
| .word 0x8b0a72a3 | |
| .word 0xa9419a9f | |
| .word 0xaa0aa9bf | |
| .word 0xb500aa15 | |
| .word 0xd503b840 | |
| .word 0xf86bf860 | |
| .word 0x32161a89 | |
| .word 0x37083608 | |
| .word 0x38603800 | |
| .word 0x52a152a0 | |
| .word 0x52aa52a5 | |
| .word 0x53035302 | |
| .word 0x784072ba | |
| .word 0x8a0a7940 | |
| .word 0x8b108b00 | |
| .word 0x8b178b15 | |
| .word 0x92749101 | |
| .word 0x9a809280 | |
| .word 0xa8c19ad9 | |
| .word 0xa905a902 | |
| .word 0xaa13aa0d | |
| .word 0xcb00b4ff | |
| .word 0xcb19cb09 | |
| .word 0xd2a0d000 | |
| .word 0xd50bd379 | |
| .word 0xeb0dd61f | |
| .word 0x2a15f2c0 | |
| .word 0x3900381f | |
| .word 0x52a3528e | |
| .word 0x6b065308 | |
| .word 0x8b0f8b01 | |
| .word 0xa9009ac0 | |
| .word 0xa906a901 | |
| .word 0xaa0baa07 | |
| .word 0xaa19aa17 | |
| .word 0xb940b800 | |
| .word 0xd344cb15 | |
| // ══════════════════════════════════════════════════════════════════════════ | |
| // Decompressor stub — copied verbatim to output at CODE_START (0x78) | |
| // | |
| // Runs at the ELF entry point. Decompresses .text to a page-aligned | |
| // address (DECOMP_DEST = ceil_page(p_filesz) + 0x78) so all ADRP+ADD | |
| // encodings are preserved. Copies rodata, flushes icache, jumps. | |
| // The stub runs in place and is never overwritten. | |
| // | |
| // Unsupported instructions encoded as .word constants: | |
| // dc cvau, ic ivau, dsb ish, isb, br | |
| // ══════════════════════════════════════════════════════════════════════════ | |
| _decomp_stub_start: | |
| adr x6, . // get our address | |
| // compute decompression destination + preload rodata_size | |
| ldp w7, w8, [x6, #STUB_DATA_DECOMP_DEST] | |
| add x7, x6, x7 // x7 = stub_base + offset | |
| // set up dict/stream pointers (right after stub in file) | |
| add x2, x6, #(STUB_SIZE - 4) // full_dict - 4 (1-based index → 0-based via ptr adjust) | |
| add x3, x2, #(FULL_DICT_SIZE - 256 + 4) // half_dict adjusted for 0x80-based index | |
| add x0, x2, #(FULL_DICT_SIZE + HALF_DICT_SIZE + 4) // stream | |
| mov x1, x7 // output dest | |
| // ── decompress ──────────────────────────────────────────────────────── | |
| 3: ldrb w4, [x0], #1 | |
| cbz w4, _decomp_copy_rodata | |
| ldr w5, [x2, x4, lsl #2] // speculative full dict (harmless if half/raw) | |
| tbz w4, #7, 5f // bit 7 clear → full dict or raw | |
| ldrh w5, [x3, x4, lsl #1] // half dict: upper 16 bits | |
| ldrh w9, [x0], #2 | |
| orr w5, w9, w5, lsl #16 | |
| 5: cmp w4, #0x7F | |
| b.ne 6f | |
| ldr w5, [x0], #4 // raw: overwrite with stream word | |
| 6: str w5, [x1], #4 | |
| b 3b | |
| // ── copy rodata ─────────────────────────────────────────────────────── | |
| _decomp_copy_rodata: | |
| cbz x8, _decomp_flush | |
| 7: ldrb w3, [x0], #1 | |
| strb w3, [x1], #1 | |
| sub x8, x8, #1 | |
| cbnz x8, 7b | |
| // ── icache flush (x7=start, x1=end) ─────────────────────────────────── | |
| _decomp_flush: | |
| mov x0, x7 | |
| 7: .word 0xd50b7b20 // dc cvau, x0 | |
| .word 0xd5033b9f // dsb ish | |
| .word 0xd50b7520 // ic ivau, x0 | |
| add x0, x0, #64 | |
| cmp x0, x1 | |
| b.lo 7b | |
| .word 0xd5033b9f // dsb ish | |
| .word 0xd5033fdf // isb | |
| // jump to decompressed entry | |
| br x7 | |
| // data block (2 x uint32, patched by assembler at output time) | |
| _decomp_data_decomp_dest: | |
| .word 0 | |
| _decomp_data_rodata_size: | |
| .word 0 | |
| _decomp_stub_end: | |
| // computed stub constants (auto-adjust when stub changes) | |
| .equ STUB_SIZE, (_decomp_stub_end - _decomp_stub_start) | |
| .equ STUB_DATA_DECOMP_DEST, (_decomp_data_decomp_dest - _decomp_stub_start) | |
| .equ STUB_DATA_RODATA_SIZE, (_decomp_data_rodata_size - _decomp_stub_start) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment