Created
April 17, 2026 15:14
-
-
Save damieng/4a9bd4c4cf300c6df767021c50a300ce to your computer and use it in GitHub Desktop.
Z80 linter
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| """ | |
| z80lint.py -- Z80 assembler linter for CP/M 3.1 / ZX Spectrum +3 project. | |
| Proper instruction parser with effects table, replacing the old regex linter. | |
| Checks: | |
| 1. Stack balance: every RET must be reached with net push/pop depth of zero. | |
| 2. Register clobber: reads of registers known clobbered by a preceding CALL | |
| without intervening restore. | |
| 3. Dead stores: writes between push/pop that are discarded by the pop. | |
| Routine header format: | |
| ; routine_name - Description | |
| ; In: A = input | |
| ; Out: HL = result | |
| ; Clobbers: AF, BC, DE | |
| ; Stack: modifies <- suppresses stack balance check | |
| Suppression: | |
| Add '; lint: ignore' to any line to suppress warnings on that line. | |
| Usage: | |
| python3 tools/z80lint.py [file.asm ...] | |
| (default: scans src/bios/*.asm src/bdos/*.asm src/ccp/*.asm) | |
| Exit code: 0 = clean, 1 = issues found. | |
| """ | |
| import re | |
| import sys | |
| from pathlib import Path | |
| # ------------------------------------------------------------------------- | |
| # Register constants | |
| # ------------------------------------------------------------------------- | |
| SINGLES = {'a', 'b', 'c', 'd', 'e', 'h', 'l', 'i', 'r'} | |
| PAIRS = {'bc', 'de', 'hl', 'sp', 'af', 'ix', 'iy'} | |
| CONDITIONS = {'z', 'nz', 'c', 'nc', 'p', 'm', 'pe', 'po'} | |
| PAIR_EXPAND = { | |
| 'af': ('a', 'f'), | |
| 'bc': ('b', 'c'), | |
| 'de': ('d', 'e'), | |
| 'hl': ('h', 'l'), | |
| 'ix': ('ixh', 'ixl'), | |
| 'iy': ('iyh', 'iyl'), | |
| 'sp': ('sp',), | |
| } | |
| # For annotation parsing: map pair names to sub-registers we track | |
| ANNOT_PAIR_MAP = { | |
| 'AF': {'a', 'f'}, | |
| 'BC': {'b', 'c'}, | |
| 'DE': {'d', 'e'}, | |
| 'HL': {'h', 'l'}, | |
| 'IX': {'ixh', 'ixl'}, | |
| 'IY': {'iyh', 'iyl'}, | |
| 'SP': {'sp'}, | |
| } | |
| # ------------------------------------------------------------------------- | |
| # Operand classifier | |
| # ------------------------------------------------------------------------- | |
| class Op: | |
| """Classified operand.""" | |
| __slots__ = ('kind', 'value', 'inner') | |
| # Kinds: 'reg8', 'reg16', 'indirect_pair', 'indexed', 'indirect_mem', | |
| # 'imm', 'cond', 'symbol', 'port_c', 'port_imm' | |
| def __init__(self, kind, value=None, inner=None): | |
| self.kind = kind | |
| self.value = value # register name or condition code (lowercase) | |
| self.inner = inner # for indexed: base pair | |
| def __repr__(self): | |
| if self.inner: | |
| return f"Op({self.kind}, {self.value}, {self.inner})" | |
| return f"Op({self.kind}, {self.value})" | |
| # Regex patterns for operand classification | |
| _RE_INDIRECT_PAIR = re.compile(r'^\((?:hl|bc|de|sp)\)$', re.I) | |
| _RE_INDEXED = re.compile(r'^\((ix|iy)\s*[+\-]', re.I) | |
| _RE_INDIRECT_MEM = re.compile(r'^\([^)]+\)$') | |
| _RE_PORT_C = re.compile(r'^\(c\)$', re.I) | |
| def classify_operand(s): | |
| """Classify a single operand string into an Op.""" | |
| s = s.strip() | |
| low = s.lower() | |
| # Register pairs first (before checking singles, since 'c' is both) | |
| if low in PAIRS: | |
| return Op('reg16', low) | |
| # 8-bit registers | |
| if low in SINGLES: | |
| return Op('reg8', low) | |
| # IXH/IXL/IYH/IYL | |
| if low in ('ixh', 'ixl', 'iyh', 'iyl'): | |
| return Op('reg8', low) | |
| # (C) for I/O | |
| if _RE_PORT_C.match(low): | |
| return Op('port_c') | |
| # (HL), (BC), (DE), (SP) indirect | |
| if _RE_INDIRECT_PAIR.match(low): | |
| inner = low[1:-1] | |
| return Op('indirect_pair', inner) | |
| # (IX+d), (IY+d) | |
| m = _RE_INDEXED.match(low) | |
| if m: | |
| return Op('indexed', inner=m.group(1).lower()) | |
| # Condition codes (excluding 'c' which is handled contextually) | |
| if low in CONDITIONS and low != 'c': | |
| return Op('cond', low) | |
| # (nn) - indirect memory | |
| if _RE_INDIRECT_MEM.match(s): | |
| return Op('indirect_mem') | |
| # Immediate/symbol | |
| return Op('imm') | |
| def classify_operands(mnemonic, raw_operands): | |
| """ | |
| Classify operand list, handling the condition code ambiguity for 'c'. | |
| For jp/jr/call/ret, first operand might be a condition code. | |
| """ | |
| ops = [] | |
| for i, raw in enumerate(raw_operands): | |
| op = classify_operand(raw) | |
| # Handle condition code disambiguation | |
| if i == 0 and mnemonic in ('jp', 'jr', 'call', 'ret'): | |
| low = raw.strip().lower() | |
| if low in CONDITIONS: | |
| ops.append(Op('cond', low)) | |
| continue | |
| # 'c' as condition code: only if there is a second operand (target) | |
| if low == 'c' and len(raw_operands) > 1: | |
| ops.append(Op('cond', low)) | |
| continue | |
| ops.append(op) | |
| return ops | |
| # ------------------------------------------------------------------------- | |
| # Assembly line parser | |
| # ------------------------------------------------------------------------- | |
| # Directives we skip (not instructions) | |
| DIRECTIVES = { | |
| 'org', 'equ', 'ds', 'dw', 'db', 'defs', 'defw', 'defb', 'defm', | |
| 'phase', 'dephase', 'block', 'align', 'if', 'else', 'endif', 'ifdef', | |
| 'ifndef', 'macro', 'endm', 'include', 'incbin', 'device', 'assert', | |
| 'display', 'struct', 'ends', 'dup', 'edup', 'module', 'endmodule', | |
| 'define', 'undefine', 'export', 'page', 'slot', 'size', 'lua', | |
| 'endlua', 'end', 'output', 'outend', 'fpos', 'opt', 'byte', 'word', | |
| 'savenex', 'savesna', 'savebin', 'savehob', 'emptytap', 'savetap', | |
| 'shellexec', 'hex', | |
| } | |
| def strip_comment(line): | |
| """Strip ; comment, respecting quoted strings.""" | |
| result = [] | |
| in_str = False | |
| q = None | |
| for ch in line: | |
| if not in_str and ch in ('"', "'"): | |
| in_str, q = True, ch | |
| elif in_str and ch == q: | |
| in_str = False | |
| elif not in_str and ch == ';': | |
| break | |
| result.append(ch) | |
| return ''.join(result).strip() | |
| def split_operands(text): | |
| """Split operand string on commas, respecting parentheses.""" | |
| ops = [] | |
| depth = 0 | |
| current = [] | |
| for ch in text: | |
| if ch == '(': | |
| depth += 1 | |
| elif ch == ')': | |
| depth -= 1 | |
| elif ch == ',' and depth == 0: | |
| ops.append(''.join(current).strip()) | |
| current = [] | |
| continue | |
| current.append(ch) | |
| if current: | |
| s = ''.join(current).strip() | |
| if s: | |
| ops.append(s) | |
| return ops | |
| def parse_line(raw): | |
| """ | |
| Parse a single assembly line. | |
| Returns (label, mnemonic, operands_str, operands_list, is_directive, is_local) | |
| where label may be None, mnemonic may be None. | |
| """ | |
| code = strip_comment(raw) | |
| if not code: | |
| return None, None, '', [], False, False | |
| label = None | |
| is_local = False | |
| # Check for label (with colon) | |
| m = re.match(r'^(\.?\w+)\s*:', code) | |
| if m: | |
| label = m.group(1) | |
| is_local = label.startswith('.') | |
| code = code[m.end():].strip() | |
| if not code: | |
| return label, None, '', [], False, is_local | |
| # Split mnemonic and operands | |
| parts = code.split(None, 1) | |
| mnemonic = parts[0].lower() | |
| # Handle sjasmplus label-without-colon followed by equ | |
| if len(parts) > 1 and parts[1].lower().startswith('equ'): | |
| return parts[0], 'equ', parts[1][3:].strip(), [], True, False | |
| if mnemonic in DIRECTIVES: | |
| return label, mnemonic, parts[1] if len(parts) > 1 else '', [], True, is_local | |
| operands_str = parts[1] if len(parts) > 1 else '' | |
| operands = split_operands(operands_str) if operands_str else [] | |
| return label, mnemonic, operands_str, operands, False, is_local | |
| # ------------------------------------------------------------------------- | |
| # Instruction effects resolver | |
| # ------------------------------------------------------------------------- | |
| def _pair_regs(pair): | |
| """Expand pair name to set of sub-registers.""" | |
| return set(PAIR_EXPAND.get(pair, (pair,))) | |
| def get_effects(mnemonic, ops): | |
| """ | |
| Given a mnemonic and classified operands, return (reads, writes). | |
| Both are sets of lowercase register names: a, b, c, d, e, h, l, f, | |
| sp, ixh, ixl, iyh, iyl, i, r. | |
| """ | |
| reads = set() | |
| writes = set() | |
| def add_op_reads(op): | |
| """Add registers read by accessing this operand.""" | |
| if op.kind == 'reg8': | |
| reads.add(op.value) | |
| elif op.kind == 'reg16': | |
| reads.update(_pair_regs(op.value)) | |
| elif op.kind == 'indirect_pair': | |
| reads.update(_pair_regs(op.value)) | |
| elif op.kind == 'indexed': | |
| reads.update(_pair_regs(op.inner)) | |
| elif op.kind == 'port_c': | |
| reads.update(('b', 'c')) | |
| def add_dst_writes(op): | |
| """Add register writes for a destination operand.""" | |
| if op.kind == 'reg8': | |
| writes.add(op.value) | |
| elif op.kind == 'reg16': | |
| writes.update(_pair_regs(op.value)) | |
| elif op.kind in ('indirect_pair', 'indexed'): | |
| # Writing to memory: the address register is READ not written | |
| add_op_reads(op) | |
| # ---- LD variants ---- | |
| if mnemonic == 'ld': | |
| if len(ops) == 2: | |
| dst, src = ops | |
| if dst.kind == 'reg8': | |
| # LD r, src | |
| writes.add(dst.value) | |
| add_op_reads(src) | |
| elif dst.kind in ('indirect_pair', 'indexed'): | |
| # LD (HL), src / LD (IX+d), src | |
| add_dst_writes(dst) | |
| add_op_reads(src) | |
| elif dst.kind == 'reg16': | |
| if dst.value == 'sp' and src.kind == 'reg16': | |
| # LD SP, HL/IX/IY | |
| add_op_reads(src) | |
| writes.add('sp') | |
| elif src.kind == 'indirect_mem': | |
| # LD pair, (nn) - writes both sub-registers | |
| writes.update(_pair_regs(dst.value)) | |
| else: | |
| # LD pair, nn / LD pair, pair | |
| writes.update(_pair_regs(dst.value)) | |
| add_op_reads(src) | |
| elif dst.kind == 'indirect_mem': | |
| # LD (nn), A / LD (nn), pair | |
| add_op_reads(src) | |
| return reads, writes | |
| # ---- ALU: ADD/ADC/SUB/SBC ---- | |
| if mnemonic in ('add', 'adc', 'sub', 'sbc'): | |
| if len(ops) == 2 and ops[0].kind == 'reg16': | |
| # 16-bit: ADD HL,pair / ADC HL,pair / SBC HL,pair | |
| pair_dst = ops[0].value | |
| reads.update(_pair_regs(pair_dst)) | |
| add_op_reads(ops[1]) | |
| writes.update(_pair_regs(pair_dst)) | |
| writes.add('f') | |
| if mnemonic in ('adc', 'sbc'): | |
| reads.add('f') | |
| return reads, writes | |
| # 8-bit ALU | |
| if len(ops) == 2: | |
| # ADD A, x / ADC A, x / SBC A, x | |
| reads.add('a') | |
| add_op_reads(ops[1]) | |
| elif len(ops) == 1: | |
| # SUB x (implicit A) | |
| reads.add('a') | |
| add_op_reads(ops[0]) | |
| writes.add('a') | |
| writes.add('f') | |
| if mnemonic in ('adc', 'sbc'): | |
| reads.add('f') | |
| return reads, writes | |
| # ---- Logic: AND/OR/XOR/CP ---- | |
| if mnemonic in ('and', 'or', 'xor', 'cp'): | |
| if len(ops) >= 1: | |
| src = ops[-1] | |
| if mnemonic == 'xor' and src.kind == 'reg8' and src.value == 'a': | |
| # XOR A: sets A=0, does NOT meaningfully read A | |
| writes.add('a') | |
| writes.add('f') | |
| return reads, writes | |
| if mnemonic == 'or' and src.kind == 'reg8' and src.value == 'a': | |
| # OR A: tests A, writes F only (A value unchanged) | |
| reads.add('a') | |
| writes.add('f') | |
| return reads, writes | |
| if mnemonic == 'cp': | |
| # CP x: reads A, reads x, writes F only | |
| reads.add('a') | |
| add_op_reads(src) | |
| writes.add('f') | |
| return reads, writes | |
| # AND/OR/XOR general | |
| reads.add('a') | |
| add_op_reads(src) | |
| writes.add('a') | |
| writes.add('f') | |
| return reads, writes | |
| # ---- INC/DEC ---- | |
| if mnemonic in ('inc', 'dec'): | |
| if len(ops) == 1: | |
| op = ops[0] | |
| if op.kind == 'reg16': | |
| # INC/DEC pair: NO flags affected | |
| reads.update(_pair_regs(op.value)) | |
| writes.update(_pair_regs(op.value)) | |
| elif op.kind == 'reg8': | |
| # INC/DEC r: flags affected | |
| reads.add(op.value) | |
| writes.add(op.value) | |
| writes.add('f') | |
| elif op.kind in ('indirect_pair', 'indexed'): | |
| # INC/DEC (HL) / (IX+d): reads address, writes flags | |
| add_op_reads(op) | |
| writes.add('f') | |
| return reads, writes | |
| # ---- PUSH/POP ---- | |
| if mnemonic == 'push': | |
| if ops and ops[0].kind == 'reg16': | |
| reads.update(_pair_regs(ops[0].value)) | |
| return reads, writes | |
| if mnemonic == 'pop': | |
| if ops and ops[0].kind == 'reg16': | |
| writes.update(_pair_regs(ops[0].value)) | |
| return reads, writes | |
| # ---- Rotate accumulator ---- | |
| if mnemonic in ('rlca', 'rrca', 'rla', 'rra'): | |
| reads.add('a') | |
| writes.update(('a', 'f')) | |
| if mnemonic in ('rla', 'rra'): | |
| reads.add('f') | |
| return reads, writes | |
| # ---- Rotate/shift CB prefix: RLC/RRC/RL/RR/SLA/SRA/SRL ---- | |
| if mnemonic in ('rlc', 'rrc', 'rl', 'rr', 'sla', 'sra', 'srl'): | |
| if ops: | |
| op = ops[-1] | |
| if op.kind == 'reg8': | |
| reads.add(op.value) | |
| writes.add(op.value) | |
| else: | |
| add_op_reads(op) | |
| writes.add('f') | |
| if mnemonic in ('rl', 'rr'): | |
| reads.add('f') | |
| return reads, writes | |
| # ---- BIT/SET/RES ---- | |
| if mnemonic == 'bit': | |
| if len(ops) >= 2: | |
| add_op_reads(ops[-1]) | |
| writes.add('f') | |
| return reads, writes | |
| if mnemonic in ('set', 'res'): | |
| if len(ops) >= 2: | |
| op = ops[-1] | |
| if op.kind == 'reg8': | |
| reads.add(op.value) | |
| writes.add(op.value) | |
| else: | |
| add_op_reads(op) | |
| return reads, writes | |
| # ---- Block operations ---- | |
| if mnemonic in ('ldi', 'ldd'): | |
| reads.update(('b', 'c', 'd', 'e', 'h', 'l')) | |
| writes.update(('b', 'c', 'd', 'e', 'h', 'l', 'f')) | |
| return reads, writes | |
| if mnemonic in ('ldir', 'lddr'): | |
| reads.update(('b', 'c', 'd', 'e', 'h', 'l')) | |
| writes.update(('b', 'c', 'd', 'e', 'h', 'l', 'f')) | |
| return reads, writes | |
| if mnemonic in ('cpi', 'cpd'): | |
| reads.update(('a', 'b', 'c', 'h', 'l')) | |
| writes.update(('b', 'c', 'h', 'l', 'f')) | |
| return reads, writes | |
| if mnemonic in ('cpir', 'cpdr'): | |
| reads.update(('a', 'b', 'c', 'h', 'l')) | |
| writes.update(('b', 'c', 'h', 'l', 'f')) | |
| return reads, writes | |
| # ---- Exchange ---- | |
| if mnemonic == 'ex': | |
| if len(ops) == 2: | |
| a, b = ops | |
| if a.kind == 'reg16' and b.kind == 'reg16': | |
| if a.value == 'de' and b.value == 'hl': | |
| reads.update(('d', 'e', 'h', 'l')) | |
| writes.update(('d', 'e', 'h', 'l')) | |
| elif a.value == 'af': | |
| # EX AF, AF' | |
| reads.update(('a', 'f')) | |
| writes.update(('a', 'f')) | |
| elif a.kind == 'indirect_pair' and a.value == 'sp': | |
| # EX (SP), HL / IX / IY | |
| reads.add('sp') | |
| if b.kind == 'reg16': | |
| reads.update(_pair_regs(b.value)) | |
| writes.update(_pair_regs(b.value)) | |
| return reads, writes | |
| if mnemonic == 'exx': | |
| reads.update(('b', 'c', 'd', 'e', 'h', 'l')) | |
| writes.update(('b', 'c', 'd', 'e', 'h', 'l')) | |
| return reads, writes | |
| # ---- I/O ---- | |
| if mnemonic == 'in': | |
| if len(ops) == 2: | |
| dst, src = ops | |
| if src.kind == 'port_c': | |
| reads.update(('b', 'c')) | |
| if dst.kind == 'reg8': | |
| writes.add(dst.value) | |
| writes.add('f') | |
| return reads, writes | |
| if mnemonic == 'out': | |
| if len(ops) == 2: | |
| dst, src = ops | |
| if dst.kind == 'port_c': | |
| reads.update(('b', 'c')) | |
| add_op_reads(src) | |
| return reads, writes | |
| if mnemonic in ('ini', 'ind', 'inir', 'indr'): | |
| reads.update(('b', 'c', 'h', 'l')) | |
| writes.update(('b', 'h', 'l', 'f')) | |
| return reads, writes | |
| if mnemonic in ('outi', 'outd', 'otir', 'otdr'): | |
| reads.update(('b', 'c', 'h', 'l')) | |
| writes.update(('b', 'h', 'l', 'f')) | |
| return reads, writes | |
| # ---- Control flow ---- | |
| if mnemonic in ('jp', 'jr'): | |
| if ops and ops[0].kind == 'indirect_pair' and ops[0].value == 'hl': | |
| reads.update(('h', 'l')) | |
| if ops and ops[0].kind == 'cond': | |
| reads.add('f') | |
| return reads, writes | |
| if mnemonic == 'call': | |
| if ops and ops[0].kind == 'cond': | |
| reads.add('f') | |
| reads.add('sp') | |
| writes.add('sp') | |
| return reads, writes | |
| if mnemonic in ('ret', 'reti', 'retn'): | |
| reads.add('sp') | |
| writes.add('sp') | |
| if ops and ops[0].kind == 'cond': | |
| reads.add('f') | |
| return reads, writes | |
| if mnemonic == 'djnz': | |
| reads.add('b') | |
| writes.update(('b', 'f')) | |
| return reads, writes | |
| if mnemonic == 'rst': | |
| reads.add('sp') | |
| writes.add('sp') | |
| return reads, writes | |
| # ---- Misc ---- | |
| if mnemonic == 'daa': | |
| reads.update(('a', 'f')) | |
| writes.update(('a', 'f')) | |
| return reads, writes | |
| if mnemonic == 'cpl': | |
| reads.add('a') | |
| writes.update(('a', 'f')) | |
| return reads, writes | |
| if mnemonic == 'neg': | |
| reads.add('a') | |
| writes.update(('a', 'f')) | |
| return reads, writes | |
| if mnemonic == 'scf': | |
| writes.add('f') | |
| return reads, writes | |
| if mnemonic == 'ccf': | |
| reads.add('f') | |
| writes.add('f') | |
| return reads, writes | |
| if mnemonic in ('nop', 'halt', 'ei', 'di'): | |
| return reads, writes | |
| if mnemonic == 'im': | |
| return reads, writes | |
| # Unknown instruction: return empty sets (safe default) | |
| return reads, writes | |
| # ------------------------------------------------------------------------- | |
| # Routine / header parsing | |
| # ------------------------------------------------------------------------- | |
| ROUTINE_LABEL_RE = re.compile(r'^([A-Za-z_]\w*)\s*:') | |
| CLOBBER_RE = re.compile(r';\s*Clobbers?:\s*(.+)', re.I) | |
| OUT_RE = re.compile(r';\s*Out:\s*(.+)', re.I) | |
| STACK_RE = re.compile(r';\s*Stack\s*:\s*\S', re.I) | |
| def parse_annotation_regs(text): | |
| """Parse register list from annotation: 'AF, BC, DE' -> {'a','f','b','c','d','e'}""" | |
| regs = set() | |
| for part in text.split(','): | |
| part = re.sub(r'\(.*?\)', '', part) # strip parenthetical notes | |
| part = re.sub(r'=.*', '', part) # strip "= value" descriptions | |
| part = part.strip().upper() | |
| if part in ANNOT_PAIR_MAP: | |
| regs.update(ANNOT_PAIR_MAP[part]) | |
| elif part in ('A', 'B', 'C', 'D', 'E', 'H', 'L', 'F'): | |
| regs.add(part.lower()) | |
| elif part in ('IX', 'IY'): | |
| regs.update(ANNOT_PAIR_MAP.get(part, set())) | |
| elif part == 'SP': | |
| regs.add('sp') | |
| elif part in ('Z', 'NZ', 'NC', 'CARRY', 'ZERO', 'FLAG', 'FLAGS'): | |
| regs.add('f') # Flag conditions mean F is an output | |
| # Also check the raw text for flag-related keywords | |
| upper = text.upper() | |
| if any(w in upper for w in ('Z =', 'NZ =', 'CARRY', 'Z FLAG', 'NZ FLAG', 'Z SET', 'NZ SET')): | |
| regs.add('f') | |
| return regs | |
| class Routine: | |
| """Represents a parsed routine with header annotations.""" | |
| __slots__ = ('name', 'filepath', 'start_lineno', 'lines', | |
| 'stack_exempt', 'clobbers', 'out_regs') | |
| def __init__(self, name, filepath, start_lineno): | |
| self.name = name | |
| self.filepath = filepath | |
| self.start_lineno = start_lineno | |
| self.lines = [] # [(lineno, raw_text), ...] | |
| self.stack_exempt = False | |
| self.clobbers = set() # register names from Clobbers: | |
| self.out_regs = set() # register names from Out: | |
| def parse_file(filepath): | |
| """Parse a .asm file into a list of Routine objects.""" | |
| try: | |
| with open(filepath, errors='replace') as f: | |
| raw = f.readlines() | |
| except OSError as e: | |
| print(f"ERROR: cannot open {filepath}: {e}", file=sys.stderr) | |
| return [] | |
| routines = [] | |
| current = None | |
| pending_cmts = [] # comment lines buffered before next label | |
| for lineno, line in enumerate(raw, 1): | |
| stripped = line.strip() | |
| # Blank or comment-only | |
| if not stripped or stripped.startswith(';'): | |
| if current is None: | |
| pending_cmts.append(line) | |
| else: | |
| current.lines.append((lineno, line)) | |
| if STACK_RE.search(line): | |
| current.stack_exempt = True | |
| continue | |
| # Non-local label: new routine | |
| m = ROUTINE_LABEL_RE.match(stripped) | |
| if m and not stripped.startswith('.'): | |
| if current is not None: | |
| routines.append(current) | |
| name = m.group(1) | |
| cur = Routine(name, filepath, lineno) | |
| # Scan header comment block | |
| header_lines = pending_cmts | |
| # Also grab trailing comments from previous routine | |
| if current is not None: | |
| tail = [] | |
| for _, raw_line in reversed(current.lines): | |
| s = raw_line.strip() | |
| if not s or s.startswith(';'): | |
| tail.append(raw_line) | |
| else: | |
| break | |
| header_lines = list(reversed(tail)) + pending_cmts | |
| in_out_block = False | |
| for hline in header_lines: | |
| if STACK_RE.search(hline): | |
| cur.stack_exempt = True | |
| mc = CLOBBER_RE.search(hline) | |
| if mc: | |
| cur.clobbers = parse_annotation_regs(mc.group(1)) | |
| in_out_block = False | |
| mo = OUT_RE.search(hline) | |
| if mo: | |
| cur.out_regs = parse_annotation_regs(mo.group(1)) | |
| in_out_block = True | |
| elif in_out_block and hline.strip().startswith(';'): | |
| # Continuation of Out: block — look for register/flag refs | |
| extra = parse_annotation_regs(hline.lstrip('; ')) | |
| cur.out_regs |= extra | |
| # Stop if we hit another annotation keyword | |
| if any(kw in hline.upper() for kw in ('IN:', 'CLOBBERS:', 'STACK:')): | |
| in_out_block = False | |
| cur.lines.append((lineno, line)) | |
| pending_cmts = [] | |
| current = cur | |
| continue | |
| # Ordinary line | |
| pending_cmts = [] | |
| if current is not None: | |
| if not current.clobbers: | |
| mc = CLOBBER_RE.search(line) | |
| if mc: | |
| current.clobbers = parse_annotation_regs(mc.group(1)) | |
| if not current.out_regs: | |
| mo = OUT_RE.search(line) | |
| if mo: | |
| current.out_regs = parse_annotation_regs(mo.group(1)) | |
| if STACK_RE.search(line): | |
| current.stack_exempt = True | |
| current.lines.append((lineno, line)) | |
| if current is not None: | |
| routines.append(current) | |
| return routines | |
| # ------------------------------------------------------------------------- | |
| # Parse instruction from line (shared helper) | |
| # ------------------------------------------------------------------------- | |
| def parse_instruction(raw): | |
| """ | |
| Parse a raw line into (mnemonic, classified_ops, raw_ops) or | |
| (None, None, None) if not an instruction. | |
| """ | |
| label, mnemonic, operands_str, operands_raw, is_directive, is_local = parse_line(raw) | |
| if mnemonic is None or is_directive: | |
| return None, None, None | |
| ops = classify_operands(mnemonic, operands_raw) | |
| return mnemonic, ops, operands_raw | |
| # ------------------------------------------------------------------------- | |
| # Check 1: Stack balance | |
| # ------------------------------------------------------------------------- | |
| def check_stack_balance(routine): | |
| """Check that every RET path has balanced push/pop.""" | |
| if routine.stack_exempt: | |
| return [] | |
| issues = [] | |
| depth = 0 | |
| sp_complex = False | |
| for lineno, raw in routine.lines: | |
| stripped = raw.strip() | |
| if not stripped or stripped.startswith(';'): | |
| continue | |
| mnemonic, ops, _ = parse_instruction(raw) | |
| if mnemonic is None: | |
| continue | |
| # SP modifications we cannot track linearly | |
| if mnemonic == 'ld' and ops and len(ops) == 2: | |
| if ops[0].kind == 'reg16' and ops[0].value == 'sp': | |
| sp_complex = True | |
| continue | |
| if mnemonic in ('inc', 'dec') and ops: | |
| if ops[0].kind == 'reg16' and ops[0].value == 'sp': | |
| sp_complex = True | |
| continue | |
| # EX (SP), xx modifies content but not depth | |
| if mnemonic == 'ex' and len(ops) == 2: | |
| if ops[0].kind == 'indirect_pair' and ops[0].value == 'sp': | |
| continue | |
| if mnemonic == 'push': | |
| depth += 1 | |
| elif mnemonic == 'pop': | |
| depth -= 1 | |
| elif mnemonic in ('ret', 'reti', 'retn'): | |
| if not sp_complex and depth != 0 and 'lint: ignore' not in raw.lower(): | |
| issues.append({ | |
| 'lineno': lineno, | |
| 'code': strip_comment(stripped), | |
| 'depth': depth, | |
| }) | |
| return issues | |
| # ------------------------------------------------------------------------- | |
| # Check 2: Register clobber after CALL | |
| # ------------------------------------------------------------------------- | |
| CALL_TARGET_RE = re.compile( | |
| r'^\s*call\s+(?:(?:z|nz|c|nc|p|m|pe|po)\s*,\s*)?(\w+)', re.I | |
| ) | |
| def check_clobbers_in_file(filepath, all_lines, routine_map): | |
| """ | |
| For each CALL with a known clobber set, scan forward in the basic block | |
| for reads of clobbered registers without intervening writes. | |
| Out registers are excluded from the clobber set. | |
| """ | |
| issues = [] | |
| for i, line in enumerate(all_lines): | |
| m = CALL_TARGET_RE.search(line) | |
| if not m: | |
| continue | |
| target = m.group(1) | |
| if target not in routine_map: | |
| continue | |
| r = routine_map[target] | |
| # Effective clobbers = declared clobbers minus Out registers | |
| clobbered = r.clobbers - r.out_regs | |
| if not clobbered: | |
| continue | |
| call_lineno = i + 1 | |
| restored = set() | |
| for j in range(i + 1, min(i + 25, len(all_lines))): | |
| fwd_raw = all_lines[j] | |
| fwd = fwd_raw.strip() | |
| if not fwd or fwd.startswith(';'): | |
| continue | |
| mnemonic, ops, _ = parse_instruction(fwd_raw) | |
| if mnemonic is None: | |
| # Could be a public label => end of basic block | |
| if ROUTINE_LABEL_RE.match(fwd) and not fwd.startswith('.'): | |
| break | |
| continue | |
| # Get writes from this instruction to track restores | |
| _, inst_writes = get_effects(mnemonic, ops) | |
| # Check for reads of still-clobbered registers BEFORE updating | |
| # restored set (the write on THIS line restores for FUTURE lines) | |
| inst_reads, _ = get_effects(mnemonic, ops) | |
| for reg in (clobbered - restored) & inst_reads: | |
| # Skip if line has lint: ignore suppression | |
| if 'lint: ignore' in fwd_raw.lower(): | |
| continue | |
| issues.append({ | |
| 'file': filepath, | |
| 'call_line': call_lineno, | |
| 'use_line': j + 1, | |
| 'target': target, | |
| 'register': reg, | |
| 'code': fwd, | |
| }) | |
| restored.update(inst_writes) | |
| # End of basic block | |
| if mnemonic in ('jp', 'jr') and (not ops or ops[0].kind != 'cond'): | |
| break | |
| if mnemonic in ('ret', 'reti', 'retn'): | |
| break | |
| if CALL_TARGET_RE.match(fwd): | |
| break | |
| if ROUTINE_LABEL_RE.match(fwd) and not fwd.startswith('.'): | |
| break | |
| return issues | |
| # ------------------------------------------------------------------------- | |
| # Check 3: Dead stores in push/pop pairs | |
| # ------------------------------------------------------------------------- | |
| def check_dead_stores(routine): | |
| """ | |
| Detect writes to a sub-register between push and matching pop that are | |
| never read before the pop discards them. | |
| Skip PUSH AF (too many false positives). | |
| """ | |
| issues = [] | |
| # Build instruction list | |
| insns = [] | |
| for lineno, raw in routine.lines: | |
| stripped = raw.strip() | |
| if not stripped or stripped.startswith(';'): | |
| continue | |
| mnemonic, ops, raw_ops = parse_instruction(raw) | |
| if mnemonic is None: | |
| # Could be a label-only line | |
| label, _, _, _, _, is_local = parse_line(raw) | |
| if label: | |
| insns.append((lineno, 'label', None, None, is_local)) | |
| continue | |
| insns.append((lineno, mnemonic, ops, raw_ops, False)) | |
| for i, (push_ln, mn, push_ops, _, _) in enumerate(insns): | |
| if mn != 'push' or not push_ops: | |
| continue | |
| if push_ops[0].kind != 'reg16': | |
| continue | |
| pair = push_ops[0].value | |
| if pair in ('ix', 'iy', 'sp'): | |
| continue | |
| # Skip AF: too many intentional save/restore patterns | |
| if pair == 'af': | |
| continue | |
| sub_regs = set(PAIR_EXPAND[pair]) | |
| written = {} # reg -> write_lineno | |
| depth = 1 | |
| for j in range(i + 1, len(insns)): | |
| ln, jmn, jops, _, is_label = insns[j] | |
| if is_label or jmn == 'label': | |
| continue | |
| # Nested push/pop tracking | |
| if jmn == 'push' and jops and jops[0].kind == 'reg16' and jops[0].value == pair: | |
| depth += 1 | |
| continue | |
| if jmn == 'pop' and jops and jops[0].kind == 'reg16' and jops[0].value == pair: | |
| depth -= 1 | |
| if depth == 0: | |
| # Matching pop found: report unread writes | |
| for reg, wln in written.items(): | |
| # Check if the write line has lint: ignore | |
| write_raw = routine.lines[wln - routine.lines[0][0]] if wln - routine.lines[0][0] < len(routine.lines) else ('', '') | |
| if isinstance(write_raw, tuple) and len(write_raw) == 2: | |
| write_raw = write_raw[1] | |
| if 'lint: ignore' in str(write_raw).lower(): | |
| continue | |
| issues.append({ | |
| 'push_lineno': push_ln, | |
| 'write_lineno': wln, | |
| 'pop_lineno': ln, | |
| 'register': reg, | |
| 'pair': pair, | |
| }) | |
| break | |
| continue | |
| # Different pair popped: interleaved stacks, abort | |
| if jmn == 'pop': | |
| break | |
| # Unconditional branch or ret: cannot track linearly | |
| if jmn in ('ret', 'reti', 'retn'): | |
| break | |
| if jmn in ('jp', 'jr') and jops and jops[0].kind != 'cond': | |
| break | |
| # CALL: clear written set (callee may have read the values) | |
| if jmn == 'call': | |
| written.clear() | |
| continue | |
| # Get instruction effects | |
| inst_reads, inst_writes = get_effects(jmn, jops) | |
| # Check reads first: any read of a written reg clears the dead flag | |
| for reg in list(written.keys()): | |
| if reg in inst_reads: | |
| del written[reg] | |
| # Track writes to sub-registers | |
| for reg in sub_regs & inst_writes: | |
| if reg not in inst_reads: | |
| # Pure write: potentially dead | |
| written[reg] = ln | |
| return issues | |
| # ------------------------------------------------------------------------- | |
| # Main | |
| # ------------------------------------------------------------------------- | |
| def main(): | |
| if len(sys.argv) < 2: | |
| files = [] | |
| for d in ['src/bios', 'src/bdos', 'src/ccp']: | |
| p = Path(d) | |
| if p.exists(): | |
| files.extend(sorted(str(f) for f in p.glob('*.asm'))) | |
| else: | |
| files = sys.argv[1:] | |
| if not files: | |
| print("No source files found.", file=sys.stderr) | |
| return 1 | |
| # Parse all files | |
| all_routines = {} | |
| file_routines = {} | |
| for fp in files: | |
| routines = parse_file(fp) | |
| file_routines[fp] = routines | |
| for r in routines: | |
| all_routines[r.name] = r | |
| total = 0 | |
| # ---- Check 1: Stack balance ---- | |
| print("=== Stack Balance ===") | |
| stack_count = 0 | |
| for fp in files: | |
| for routine in file_routines.get(fp, []): | |
| for iss in check_stack_balance(routine): | |
| d = iss['depth'] | |
| sign = '+' if d > 0 else '' | |
| print(f" STACK {fp}:{iss['lineno']}: " | |
| f"{routine.name}: ret with depth {sign}{d} " | |
| f"({abs(d)} word{'s' if abs(d) != 1 else ''} " | |
| f"{'too many pushes' if d > 0 else 'too many pops'})" | |
| f" [{iss['code']}]") | |
| stack_count += 1 | |
| if stack_count == 0: | |
| print(" OK -- all routines balanced.") | |
| total += stack_count | |
| print() | |
| # ---- Check 2: Register clobber ---- | |
| print("=== Register Clobber After CALL ===") | |
| clob_count = 0 | |
| for fp in files: | |
| try: | |
| with open(fp, errors='replace') as f: | |
| lines = f.readlines() | |
| except OSError: | |
| continue | |
| # Build per-file routine map: same-file definitions take priority | |
| local_routines = {r.name: r for r in file_routines.get(fp, [])} | |
| merged = {**all_routines, **local_routines} | |
| for iss in check_clobbers_in_file(fp, lines, merged): | |
| print(f" CLOBBER {iss['file']}:{iss['call_line']}: " | |
| f"call {iss['target']} clobbers {iss['register']}, " | |
| f"but line {iss['use_line']} reads it: {iss['code'].strip()}") | |
| clob_count += 1 | |
| if clob_count == 0: | |
| print(" OK -- no clobber issues.") | |
| total += clob_count | |
| print() | |
| # ---- Check 3: Dead stores ---- | |
| print("=== Dead Stores (push/write/pop without read) ===") | |
| dead_count = 0 | |
| for fp in files: | |
| for routine in file_routines.get(fp, []): | |
| for iss in check_dead_stores(routine): | |
| print(f" DEAD {fp}:{iss['write_lineno']}: " | |
| f"{routine.name}: write to {iss['register']} at line " | |
| f"{iss['write_lineno']} is discarded by pop {iss['pair']} " | |
| f"at line {iss['pop_lineno']} " | |
| f"(pushed at line {iss['push_lineno']})") | |
| dead_count += 1 | |
| if dead_count == 0: | |
| print(" OK -- no dead stores found.") | |
| total += dead_count | |
| print() | |
| if total: | |
| print(f"{total} issue(s) found.") | |
| else: | |
| print("All checks passed.") | |
| return 1 if total else 0 | |
| if __name__ == '__main__': | |
| sys.exit(main()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment