Iftimie/picoasm.py

## picoasm.py
#!/usr/bin/env python3
"""
picoasm.py — tiny assembler for the 8-bit MCU ROM

Instruction format:
  IR[7:5] = opcode (3 bits)
  IR[4:0] = operand (5 bits)

Opcodes:
  NOP = 000
  OUT = 001
  JMP = 010   operand = addr (0..7) in IR[2:0]
  LDA = 011   operand = imm (0..31)
  ADD = 100   operand = imm (0..31)
  CMP = 101   operand = imm (0..31)   ; sets Z flag (A == imm)
  JZ  = 110   operand = addr (0..7) in IR[2:0]  ; jump if Z == 1

Output format:
  0: b0 b1 b2 b3 b4 b5 b6 b7
  8: b8 b9 ...
"""

import re
from typing import Dict, List

OPCODES = {
    "NOP": 0b000,
    "OUT": 0b001,
    "JMP": 0b010,
    "LDA": 0b011,
    "ADD": 0b100,
    "CMP": 0b101,  # NEW
    "JZ":  0b110,  # NEW
}

def strip_comment(line: str) -> str:
    for c in (";", "#"):
        if c in line:
            line = line.split(c, 1)[0]
    return line.strip()

def tokenize(line: str) -> List[str]:
    return [t for t in re.split(r"[,\s]+", line) if t]

def parse_int(s: str) -> int:
    if s.lower().startswith("0x"):
        return int(s, 16)
    if s.lower().startswith("0b"):
        return int(s, 2)
    return int(s, 10)

def first_pass(lines: List[str]) -> Dict[str, int]:
    labels: Dict[str, int] = {}
    pc = 0

    for lineno, raw in enumerate(lines, start=1):
        line = strip_comment(raw)
        if not line:
            continue

        if ":" in line:
            label, rest = line.split(":", 1)
            label = label.strip()
            if not re.fullmatch(r"[A-Za-z_][A-Za-z0-9_]*", label):
                raise ValueError(f"Line {lineno}: invalid label '{label}'")
            if label in labels:
                raise ValueError(f"Line {lineno}: duplicate label '{label}'")
            labels[label] = pc
            line = rest.strip()
            if not line:
                continue

        pc += 1

    return labels

def assemble(source: str) -> Dict[int, int]:
    lines = source.splitlines()
    labels = first_pass(lines)

    mem: Dict[int, int] = {}
    pc = 0

    for lineno, raw in enumerate(lines, start=1):
        line = strip_comment(raw)
        if not line:
            continue

        if ":" in line:
            _, line = line.split(":", 1)
            line = line.strip()
            if not line:
                continue

        toks = tokenize(line)
        inst = toks[0].upper()

        if inst not in OPCODES:
            raise ValueError(f"Line {lineno}: unknown instruction '{inst}'")

        opcode = OPCODES[inst]
        operand = 0

        def resolve(tok: str) -> int:
            if tok in labels:
                return labels[tok]
            return parse_int(tok)

        if inst in ("NOP", "OUT"):
            if len(toks) != 1:
                raise ValueError(f"Line {lineno}: {inst} takes no operands")

        elif inst in ("LDA", "ADD", "CMP"):
            if len(toks) != 2:
                raise ValueError(f"Line {lineno}: {inst} takes one operand")
            operand = resolve(toks[1])
            if not (0 <= operand <= 31):
                raise ValueError(f"Line {lineno}: immediate out of range 0..31")

        elif inst in ("JMP", "JZ"):
            if len(toks) != 2:
                raise ValueError(f"Line {lineno}: {inst} takes one operand")
            operand = resolve(toks[1])
            if not (0 <= operand <= 7):
                raise ValueError(f"Line {lineno}: {inst} addr out of range 0..7")

        byte = ((opcode & 0b111) << 5) | (operand & 0b11111)
        mem[pc] = byte
        pc += 1

    return mem

def format_output(mem: Dict[int, int], group: int = 8) -> str:
    if not mem:
        return ""

    size = max(mem.keys()) + 1
    lines = []

    for base in range(0, size, group):
        vals = [mem.get(base + i, 0) for i in range(group)]
        lines.append(f"{base}: " + " ".join(str(v) for v in vals))

    return "\n".join(lines)

if __name__ == "__main__":
    import sys
    if len(sys.argv) < 2:
        print("Usage: picoasm.py <filename>", file=sys.stderr)
        sys.exit(1)
    with open(sys.argv[1], "r", encoding="utf-8") as f:
        source = f.read()
    mem = assemble(source)
    print(format_output(mem))
	#!/usr/bin/env python3
	"""
	picoasm.py — tiny assembler for the 8-bit MCU ROM

	Instruction format:
	IR[7:5] = opcode (3 bits)
	IR[4:0] = operand (5 bits)

	Opcodes:
	NOP = 000
	OUT = 001
	JMP = 010 operand = addr (0..7) in IR[2:0]
	LDA = 011 operand = imm (0..31)
	ADD = 100 operand = imm (0..31)
	CMP = 101 operand = imm (0..31) ; sets Z flag (A == imm)
	JZ = 110 operand = addr (0..7) in IR[2:0] ; jump if Z == 1

	Output format:
	0: b0 b1 b2 b3 b4 b5 b6 b7
	8: b8 b9 ...
	"""

	import re
	from typing import Dict, List

	OPCODES = {
	"NOP": 0b000,
	"OUT": 0b001,
	"JMP": 0b010,
	"LDA": 0b011,
	"ADD": 0b100,
	"CMP": 0b101, # NEW
	"JZ": 0b110, # NEW
	}

	def strip_comment(line: str) -> str:
	for c in (";", "#"):
	if c in line:
	line = line.split(c, 1)[0]
	return line.strip()

	def tokenize(line: str) -> List[str]:
	return [t for t in re.split(r"[,\s]+", line) if t]

	def parse_int(s: str) -> int:
	if s.lower().startswith("0x"):
	return int(s, 16)
	if s.lower().startswith("0b"):
	return int(s, 2)
	return int(s, 10)

	def first_pass(lines: List[str]) -> Dict[str, int]:
	labels: Dict[str, int] = {}
	pc = 0

	for lineno, raw in enumerate(lines, start=1):
	line = strip_comment(raw)
	if not line:
	continue

	if ":" in line:
	label, rest = line.split(":", 1)
	label = label.strip()
	if not re.fullmatch(r"[A-Za-z_][A-Za-z0-9_]*", label):
	raise ValueError(f"Line {lineno}: invalid label '{label}'")
	if label in labels:
	raise ValueError(f"Line {lineno}: duplicate label '{label}'")
	labels[label] = pc
	line = rest.strip()
	if not line:
	continue

	pc += 1

	return labels

	def assemble(source: str) -> Dict[int, int]:
	lines = source.splitlines()
	labels = first_pass(lines)

	mem: Dict[int, int] = {}
	pc = 0

	for lineno, raw in enumerate(lines, start=1):
	line = strip_comment(raw)
	if not line:
	continue

	if ":" in line:
	_, line = line.split(":", 1)
	line = line.strip()
	if not line:
	continue

	toks = tokenize(line)
	inst = toks[0].upper()

	if inst not in OPCODES:
	raise ValueError(f"Line {lineno}: unknown instruction '{inst}'")

	opcode = OPCODES[inst]
	operand = 0

	def resolve(tok: str) -> int:
	if tok in labels:
	return labels[tok]
	return parse_int(tok)

	if inst in ("NOP", "OUT"):
	if len(toks) != 1:
	raise ValueError(f"Line {lineno}: {inst} takes no operands")

	elif inst in ("LDA", "ADD", "CMP"):
	if len(toks) != 2:
	raise ValueError(f"Line {lineno}: {inst} takes one operand")
	operand = resolve(toks[1])
	if not (0 <= operand <= 31):
	raise ValueError(f"Line {lineno}: immediate out of range 0..31")

	elif inst in ("JMP", "JZ"):
	if len(toks) != 2:
	raise ValueError(f"Line {lineno}: {inst} takes one operand")
	operand = resolve(toks[1])
	if not (0 <= operand <= 7):
	raise ValueError(f"Line {lineno}: {inst} addr out of range 0..7")

	byte = ((opcode & 0b111) << 5) \| (operand & 0b11111)
	mem[pc] = byte
	pc += 1

	return mem

	def format_output(mem: Dict[int, int], group: int = 8) -> str:
	if not mem:
	return ""

	size = max(mem.keys()) + 1
	lines = []

	for base in range(0, size, group):
	vals = [mem.get(base + i, 0) for i in range(group)]
	lines.append(f"{base}: " + " ".join(str(v) for v in vals))

	return "\n".join(lines)

	if __name__ == "__main__":
	import sys
	if len(sys.argv) < 2:
	print("Usage: picoasm.py <filename>", file=sys.stderr)
	sys.exit(1)
	with open(sys.argv[1], "r", encoding="utf-8") as f:
	source = f.read()
	mem = assemble(source)
	print(format_output(mem))
No results found