Created
January 9, 2026 02:44
-
-
Save ntrrgc/250fbc28607d8e0057115165c13313ac to your computer and use it in GitHub Desktop.
Small simulator of a simplified CPU for the purposes of testing frame pointer ABIs for ARM32.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| """ | |
| Small simulator of a simplified CPU for the purposes of testing frame pointer | |
| ABIs for ARM32. | |
| """ | |
| from __future__ import annotations | |
| from abc import ABC, abstractmethod | |
| from bisect import bisect_right | |
| from dataclasses import dataclass, field | |
| from enum import StrEnum | |
| from itertools import chain | |
| from typing import Callable, Iterable, Literal, Optional, Protocol, Sequence, final | |
| INITIAL_PC = 1000 | |
| INITIAL_SP = 99000 | |
| PC = 15 | |
| LR = 14 | |
| SP = 13 | |
| IP = 12 | |
| FP = 11 | |
| REG_IX_TO_NAME = { | |
| PC: "pc", | |
| LR: "lr", | |
| SP: "sp", | |
| FP: "fp", | |
| } | |
| REG_NAME_TO_IX = {v: k for k, v in REG_IX_TO_NAME.items()} | |
| # ================= # | |
| # Program structure # | |
| # ================= # | |
| @dataclass(frozen=True) | |
| class Function: | |
| name: str | |
| addr: int | |
| callees: Sequence[str] | |
| halts: bool = False | |
| @dataclass(frozen=True) | |
| class Program: | |
| functions_by_name: dict[str, Function] | |
| instruction_memory: dict[int, Instruction] | |
| @staticmethod | |
| def compile(fns: Iterable[Function], abi: FpAbi): | |
| functions_by_name = {} | |
| instruction_memory = {} | |
| fn_name_to_addr = { | |
| fn.name: fn.addr | |
| for fn in fns | |
| } | |
| for fn in fns: | |
| functions_by_name[fn.name] = fn | |
| instructions = abi.emit_code(fn, fn_name_to_addr) | |
| addr = fn.addr | |
| for ins in instructions: | |
| assert addr not in instruction_memory, f"Memory overwritten while compiling: {addr} already contains {instruction_memory[addr]}" | |
| instruction_memory[addr] = ins | |
| addr += 4 | |
| return Program(functions_by_name, instruction_memory) | |
| def addr_to_name(self, addr: int) -> str: | |
| fns = list(sorted(self.functions_by_name.values(), key=lambda fn: fn.addr)) | |
| ix = bisect_right(fns, addr, key=lambda fn: fn.addr) - 1 | |
| assert 0 <= ix < len(fns) | |
| fn = fns[ix] | |
| return f"{fn.name}{addr - fn.addr:+}" | |
| def to_string(self): | |
| fns = list(sorted(self.functions_by_name.values(), key=lambda fn: fn.addr)) | |
| fn_ix = -1 | |
| lines: list[str] = [] | |
| for addr, ins in sorted(self.instruction_memory.items()): | |
| if fn_ix + 1 < len(fns) and addr >= fns[fn_ix + 1].addr: | |
| # Start of function | |
| fn_ix += 1 | |
| lines.append(f"{fns[fn_ix].name}({fns[fn_ix].addr}):") | |
| offset = addr - fns[fn_ix].addr | |
| lines.append(f"{offset:+6}: {ins}") | |
| return "\n".join(lines) | |
| @dataclass | |
| class Flags: | |
| is_zero: bool = True | |
| @dataclass | |
| class Machine: | |
| program: Program | |
| data_memory: dict[int, int] = field(default_factory=lambda: {}) | |
| halted: bool = field(default=False) | |
| flags: Flags = field(default_factory=Flags) | |
| regs: list[int] = field(default_factory=lambda: [ | |
| { | |
| PC: INITIAL_PC, | |
| SP: INITIAL_SP, | |
| }.get(reg_ix, 0) for reg_ix in range(16) | |
| ]) | |
| def run(self, check_consistency_fn: Callable[[Machine], None]): | |
| while not self.halted: | |
| pc = self.regs[PC] | |
| ins = self.program.instruction_memory[pc] | |
| # For consistency with typical debuggers, PC will already point to | |
| # the next instruction. | |
| machine.regs[PC] += 4 | |
| check_consistency_fn(self) | |
| ins.run(self) | |
| def fmt_ptr(self, addr: int) -> str: | |
| if addr in self.program.instruction_memory: | |
| return f"{addr}({self.program.addr_to_name(addr)})" | |
| return f"{addr}" | |
| def data_memory_as_str(self): | |
| return "\n".join( | |
| f"{self.fmt_ptr(addr)}: {val}" | |
| for addr, val in sorted(self.data_memory.items(), reverse=True) | |
| ) | |
| # ============ # | |
| # Instructions # | |
| # ============ # | |
| @dataclass(frozen=True) | |
| class Instruction(ABC): | |
| condition: Literal["always", "if_zero"] = field(default="always", kw_only=True) | |
| def is_condition_true(self, machine: Machine): | |
| if self.condition == "always": | |
| return True | |
| elif self.condition == "if_zero": | |
| return machine.flags.is_zero | |
| else: | |
| raise RuntimeError(f"Unexpected condition: {self.condition}") | |
| @final | |
| def run(self, machine: Machine) -> None: | |
| if self.is_condition_true(machine): | |
| self.run_inner(machine) | |
| @abstractmethod | |
| def run_inner(self, machine: Machine) -> None: | |
| raise NotImplementedError() | |
| @final | |
| @dataclass(frozen=True) | |
| class Add(Instruction): | |
| dst_reg: int | |
| src_reg: int | |
| immediate: int | |
| def run_inner(self, machine: Machine) -> None: | |
| machine.regs[self.dst_reg] = machine.regs[self.src_reg] + self.immediate | |
| @final | |
| @dataclass(frozen=True) | |
| class Push(Instruction): | |
| regs: set[int] | |
| def run_inner(self, machine: Machine) -> None: | |
| for reg_ix in sorted(self.regs, reverse=True): | |
| machine.regs[SP] -= 4 | |
| machine.data_memory[machine.regs[SP]] = machine.regs[reg_ix] | |
| @final | |
| @dataclass(frozen=True) | |
| class Pop(Instruction): | |
| regs: set[int] | |
| def run_inner(self, machine: Machine) -> None: | |
| new_sp_value: Optional[int] = None | |
| for reg_ix in sorted(self.regs): | |
| if reg_ix != SP: | |
| machine.regs[reg_ix] = machine.data_memory[machine.regs[SP]] | |
| else: | |
| new_sp_value = machine.data_memory[machine.regs[SP]] | |
| machine.regs[SP] += 4 | |
| if new_sp_value is not None: | |
| machine.regs[SP] = new_sp_value | |
| @final | |
| @dataclass(frozen=True) | |
| class BranchToAddr(Instruction): | |
| target_address: int | |
| link: bool | |
| def run_inner(self, machine: Machine) -> None: | |
| if self.link: | |
| machine.regs[LR] = machine.regs[PC] | |
| machine.regs[PC] = self.target_address | |
| @final | |
| @dataclass(frozen=True) | |
| class BranchToReg(Instruction): | |
| target_reg: int | |
| link: bool | |
| def run_inner(self, machine: Machine) -> None: | |
| if self.link: | |
| machine.regs[LR] = machine.regs[PC] | |
| machine.regs[PC] = machine.regs[self.target_reg] | |
| @final | |
| @dataclass(frozen=True) | |
| class NoOp(Instruction): | |
| comment: str | |
| def run_inner(self, machine: Machine) -> None: | |
| pass | |
| @final | |
| @dataclass(frozen=True) | |
| class Halt(Instruction): | |
| def run_inner(self, machine: Machine) -> None: | |
| machine.halted = True | |
| # ======== # | |
| # ABIs # | |
| # ======== # | |
| class FpAbi(ABC): | |
| def emit_prologue(self, fn: Function) -> Sequence[Instruction]: | |
| ... | |
| def emit_epilogue(self, fn: Function) -> Sequence[Instruction]: | |
| ... | |
| def emit_code(self, fn: Function, fn_name_to_addr: dict[str, int]) -> Sequence[Instruction]: | |
| prolog = self.emit_prologue(fn) | |
| epilog = self.emit_epilogue(fn) | |
| body: list[Instruction] = [NoOp(f"start of body of {fn.name}")] | |
| for callee_name in fn.callees: | |
| body.append(BranchToAddr(fn_name_to_addr[callee_name], link=True)) | |
| body.append(NoOp(f"end of body of {fn.name}")) | |
| return tuple(chain(prolog, body, epilog if not fn.halts else [Halt()])) | |
| class GccAbi(FpAbi): | |
| def __init__(self, push_lr_on_leafs: bool) -> None: | |
| super().__init__() | |
| # Will fail during unwinding, because it's makes it non-unwindable! | |
| self.push_lr_on_leafs = push_lr_on_leafs | |
| def emit_prologue(self, fn: Function) -> Sequence[Instruction]: | |
| if self.push_lr_on_leafs or fn.callees: | |
| return [ | |
| Push({FP, LR}), | |
| Add(FP, SP, 4), | |
| ] | |
| else: | |
| # Why gcc, why | |
| return [ | |
| Push({FP}), | |
| Add(FP, SP, 0), | |
| ] | |
| def emit_epilogue(self, fn: Function) -> Sequence[Instruction]: | |
| if self.push_lr_on_leafs or fn.callees: | |
| return [ | |
| Pop({FP, PC}), | |
| ] | |
| else: | |
| return [ | |
| Add(SP, FP, 0), | |
| Pop({FP}), | |
| BranchToReg(LR, link=False) | |
| ] | |
| def compute_callchain(self, machine: Machine) -> Sequence[int]: | |
| call_chain = [machine.regs[PC]] | |
| fp = machine.regs[FP] | |
| while fp != 0: | |
| lr = machine.data_memory[fp] | |
| if lr == 0: | |
| break # this function cannot return | |
| call_chain.append(lr) | |
| fp = machine.data_memory[fp - 4] | |
| call_chain.reverse() | |
| return call_chain | |
| class AapcsAbi(FpAbi): | |
| def __init__(self) -> None: | |
| super().__init__() | |
| def emit_prologue(self, fn: Function) -> Sequence[Instruction]: | |
| return [ | |
| Push({FP, LR}), | |
| Add(FP, SP, 0), | |
| ] | |
| def emit_epilogue(self, fn: Function) -> Sequence[Instruction]: | |
| return [ | |
| Pop({FP, PC}), | |
| ] | |
| def compute_callchain(self, machine: Machine) -> Sequence[int]: | |
| call_chain = [machine.regs[PC]] | |
| fp = machine.regs[FP] | |
| is_first = True | |
| while fp != 0: | |
| lr = machine.data_memory[fp + 4] | |
| if is_first and lr != machine.regs[LR]: | |
| # We're in the short window after the BL instruction has been | |
| # executed but before the resulting new LR value has been pushed | |
| # to the stack by the callee. Add LR to the call chain. | |
| call_chain.append(machine.regs[LR]) | |
| if lr == 0: | |
| break # this function cannot return | |
| call_chain.append(lr) | |
| fp = machine.data_memory[fp] | |
| is_first = False | |
| call_chain.reverse() | |
| return call_chain | |
| class ApcsAbi(FpAbi): | |
| def __init__(self) -> None: | |
| super().__init__() | |
| def emit_prologue(self, fn: Function) -> Sequence[Instruction]: | |
| return [ | |
| Add(IP, SP, 0), | |
| Push({FP, IP, LR, PC}), | |
| Add(FP, IP, -4), | |
| ] | |
| def emit_epilogue(self, fn: Function) -> Sequence[Instruction]: | |
| return [ | |
| Pop({FP, SP, PC}), | |
| ] | |
| def compute_callchain(self, machine: Machine) -> Sequence[int]: | |
| call_chain = [machine.regs[PC]] | |
| fp = machine.regs[FP] | |
| while fp != 0: | |
| lr = machine.data_memory[fp - 4] | |
| if lr == 0: | |
| break # this function cannot return | |
| call_chain.append(lr) | |
| fp = machine.data_memory[fp - 12] | |
| call_chain.reverse() | |
| return call_chain | |
| fns = [ | |
| Function("main", INITIAL_PC, callees=["leaf1", "fn1"], halts=True), | |
| Function("leaf1", 2000, callees=[]), | |
| Function("leaf2", 3000, callees=[]), | |
| Function("fn1", 4000, callees=["fn2"]), | |
| Function("fn2", 5000, callees=["leaf2"]), | |
| ] | |
| #abi = GccAbiStrictLeafs(push_lr_on_leafs=False) | |
| abi = ApcsAbi() | |
| program = Program.compile(fns, abi) | |
| print(program.to_string()) | |
| machine = Machine(program) | |
| def check_consistency(machine: Machine): | |
| # print(f"\n-- Running {machine.fmt_ptr(machine.regs[PC] - 4)} FP={machine.regs[FP]} SP={machine.regs[SP]} LR={machine.regs[LR]}: {machine.program.instruction_memory[machine.regs[PC]-4]}") | |
| # print(machine.data_memory_as_str()) | |
| call_chain = abi.compute_callchain(machine) | |
| print("Call chain: " + " <- ".join( | |
| machine.fmt_ptr(lr - 4) | |
| for lr in call_chain | |
| )) | |
| machine.run(check_consistency) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment