damieng · April 17, 2026 15:14
diff --git a/z80lint.py b/z80lint.py
 #!/usr/bin/env python3
 """
 z80lint.py -- Z80 assembler linter for CP/M 3.1 / ZX Spectrum +3 project.

 Proper instruction parser with effects table, replacing the old regex linter.

 Checks:
  1. Stack balance: every RET must be reached with net push/pop depth of zero.
  2. Register clobber: reads of registers known clobbered by a preceding CALL
     without intervening restore.
  3. Dead stores: writes between push/pop that are discarded by the pop.

 Routine header format:
    ; routine_name - Description
    ;   In:       A = input
    ;   Out:      HL = result
    ;   Clobbers: AF, BC, DE
    ;   Stack:    modifies   <- suppresses stack balance check

 Suppression:
    Add '; lint: ignore' to any line to suppress warnings on that line.

 Usage:
  python3 tools/z80lint.py [file.asm ...]
  (default: scans src/bios/*.asm src/bdos/*.asm src/ccp/*.asm)

 Exit code: 0 = clean, 1 = issues found.
 """

 import re
 import sys
 from pathlib import Path

 # -------------------------------------------------------------------------
 # Register constants
 # -------------------------------------------------------------------------

 SINGLES = {'a', 'b', 'c', 'd', 'e', 'h', 'l', 'i', 'r'}
 PAIRS = {'bc', 'de', 'hl', 'sp', 'af', 'ix', 'iy'}
 CONDITIONS = {'z', 'nz', 'c', 'nc', 'p', 'm', 'pe', 'po'}

 PAIR_EXPAND = {
    'af': ('a', 'f'),
    'bc': ('b', 'c'),
    'de': ('d', 'e'),
    'hl': ('h', 'l'),
    'ix': ('ixh', 'ixl'),
    'iy': ('iyh', 'iyl'),
    'sp': ('sp',),
 }

 # For annotation parsing: map pair names to sub-registers we track
 ANNOT_PAIR_MAP = {
    'AF': {'a', 'f'},
    'BC': {'b', 'c'},
    'DE': {'d', 'e'},
    'HL': {'h', 'l'},
    'IX': {'ixh', 'ixl'},
    'IY': {'iyh', 'iyl'},
    'SP': {'sp'},
 }

 # -------------------------------------------------------------------------
 # Operand classifier
 # -------------------------------------------------------------------------

 class Op:
    """Classified operand."""
    __slots__ = ('kind', 'value', 'inner')
    # Kinds: 'reg8', 'reg16', 'indirect_pair', 'indexed', 'indirect_mem',
    #        'imm', 'cond', 'symbol', 'port_c', 'port_imm'

    def __init__(self, kind, value=None, inner=None):
        self.kind = kind
        self.value = value    # register name or condition code (lowercase)
        self.inner = inner    # for indexed: base pair

    def __repr__(self):
        if self.inner:
            return f"Op({self.kind}, {self.value}, {self.inner})"
        return f"Op({self.kind}, {self.value})"


 # Regex patterns for operand classification
 _RE_INDIRECT_PAIR = re.compile(r'^\((?:hl|bc|de|sp)\)$', re.I)
 _RE_INDEXED = re.compile(r'^\((ix|iy)\s*[+\-]', re.I)
 _RE_INDIRECT_MEM = re.compile(r'^\([^)]+\)$')
 _RE_PORT_C = re.compile(r'^\(c\)$', re.I)


 def classify_operand(s):
    """Classify a single operand string into an Op."""
    s = s.strip()
    low = s.lower()

    # Register pairs first (before checking singles, since 'c' is both)
    if low in PAIRS:
        return Op('reg16', low)

    # 8-bit registers
    if low in SINGLES:
        return Op('reg8', low)

    # IXH/IXL/IYH/IYL
    if low in ('ixh', 'ixl', 'iyh', 'iyl'):
        return Op('reg8', low)

    # (C) for I/O
    if _RE_PORT_C.match(low):
        return Op('port_c')

    # (HL), (BC), (DE), (SP) indirect
    if _RE_INDIRECT_PAIR.match(low):
        inner = low[1:-1]
        return Op('indirect_pair', inner)

    # (IX+d), (IY+d)
    m = _RE_INDEXED.match(low)
    if m:
        return Op('indexed', inner=m.group(1).lower())

    # Condition codes (excluding 'c' which is handled contextually)
    if low in CONDITIONS and low != 'c':
        return Op('cond', low)

    # (nn) - indirect memory
    if _RE_INDIRECT_MEM.match(s):
        return Op('indirect_mem')

    # Immediate/symbol
    return Op('imm')


 def classify_operands(mnemonic, raw_operands):
    """
    Classify operand list, handling the condition code ambiguity for 'c'.
    For jp/jr/call/ret, first operand might be a condition code.
    """
    ops = []
    for i, raw in enumerate(raw_operands):
        op = classify_operand(raw)

        # Handle condition code disambiguation
        if i == 0 and mnemonic in ('jp', 'jr', 'call', 'ret'):
            low = raw.strip().lower()
            if low in CONDITIONS:
                ops.append(Op('cond', low))
                continue
            # 'c' as condition code: only if there is a second operand (target)
            if low == 'c' and len(raw_operands) > 1:
                ops.append(Op('cond', low))
                continue

        ops.append(op)
    return ops


 # -------------------------------------------------------------------------
 # Assembly line parser
 # -------------------------------------------------------------------------

 # Directives we skip (not instructions)
 DIRECTIVES = {
    'org', 'equ', 'ds', 'dw', 'db', 'defs', 'defw', 'defb', 'defm',
    'phase', 'dephase', 'block', 'align', 'if', 'else', 'endif', 'ifdef',
    'ifndef', 'macro', 'endm', 'include', 'incbin', 'device', 'assert',
    'display', 'struct', 'ends', 'dup', 'edup', 'module', 'endmodule',
    'define', 'undefine', 'export', 'page', 'slot', 'size', 'lua',
    'endlua', 'end', 'output', 'outend', 'fpos', 'opt', 'byte', 'word',
    'savenex', 'savesna', 'savebin', 'savehob', 'emptytap', 'savetap',
    'shellexec', 'hex',
 }


 def strip_comment(line):
    """Strip ; comment, respecting quoted strings."""
    result = []
    in_str = False
    q = None
    for ch in line:
        if not in_str and ch in ('"', "'"):
            in_str, q = True, ch
        elif in_str and ch == q:
            in_str = False
        elif not in_str and ch == ';':
            break
        result.append(ch)
    return ''.join(result).strip()


 def split_operands(text):
    """Split operand string on commas, respecting parentheses."""
    ops = []
    depth = 0
    current = []
    for ch in text:
        if ch == '(':
            depth += 1
        elif ch == ')':
            depth -= 1
        elif ch == ',' and depth == 0:
            ops.append(''.join(current).strip())
            current = []
            continue
        current.append(ch)
    if current:
        s = ''.join(current).strip()
        if s:
            ops.append(s)
    return ops


 def parse_line(raw):
    """
    Parse a single assembly line.
    Returns (label, mnemonic, operands_str, operands_list, is_directive, is_local)
    where label may be None, mnemonic may be None.
    """
    code = strip_comment(raw)
    if not code:
        return None, None, '', [], False, False

    label = None
    is_local = False

    # Check for label (with colon)
    m = re.match(r'^(\.?\w+)\s*:', code)
    if m:
        label = m.group(1)
        is_local = label.startswith('.')
        code = code[m.end():].strip()

    if not code:
        return label, None, '', [], False, is_local

    # Split mnemonic and operands
    parts = code.split(None, 1)
    mnemonic = parts[0].lower()

    # Handle sjasmplus label-without-colon followed by equ
    if len(parts) > 1 and parts[1].lower().startswith('equ'):
        return parts[0], 'equ', parts[1][3:].strip(), [], True, False

    if mnemonic in DIRECTIVES:
        return label, mnemonic, parts[1] if len(parts) > 1 else '', [], True, is_local

    operands_str = parts[1] if len(parts) > 1 else ''
    operands = split_operands(operands_str) if operands_str else []

    return label, mnemonic, operands_str, operands, False, is_local


 # -------------------------------------------------------------------------
 # Instruction effects resolver
 # -------------------------------------------------------------------------

 def _pair_regs(pair):
    """Expand pair name to set of sub-registers."""
    return set(PAIR_EXPAND.get(pair, (pair,)))


 def get_effects(mnemonic, ops):
    """
    Given a mnemonic and classified operands, return (reads, writes).
    Both are sets of lowercase register names: a, b, c, d, e, h, l, f,
    sp, ixh, ixl, iyh, iyl, i, r.
    """
    reads = set()
    writes = set()

    def add_op_reads(op):
        """Add registers read by accessing this operand."""
        if op.kind == 'reg8':
            reads.add(op.value)
        elif op.kind == 'reg16':
            reads.update(_pair_regs(op.value))
        elif op.kind == 'indirect_pair':
            reads.update(_pair_regs(op.value))
        elif op.kind == 'indexed':
            reads.update(_pair_regs(op.inner))
        elif op.kind == 'port_c':
            reads.update(('b', 'c'))

    def add_dst_writes(op):
        """Add register writes for a destination operand."""
        if op.kind == 'reg8':
            writes.add(op.value)
        elif op.kind == 'reg16':
            writes.update(_pair_regs(op.value))
        elif op.kind in ('indirect_pair', 'indexed'):
            # Writing to memory: the address register is READ not written
            add_op_reads(op)

    # ---- LD variants ----
    if mnemonic == 'ld':
        if len(ops) == 2:
            dst, src = ops

            if dst.kind == 'reg8':
                # LD r, src
                writes.add(dst.value)
                add_op_reads(src)
            elif dst.kind in ('indirect_pair', 'indexed'):
                # LD (HL), src / LD (IX+d), src
                add_dst_writes(dst)
                add_op_reads(src)
            elif dst.kind == 'reg16':
                if dst.value == 'sp' and src.kind == 'reg16':
                    # LD SP, HL/IX/IY
                    add_op_reads(src)
                    writes.add('sp')
                elif src.kind == 'indirect_mem':
                    # LD pair, (nn) - writes both sub-registers
                    writes.update(_pair_regs(dst.value))
                else:
                    # LD pair, nn / LD pair, pair
                    writes.update(_pair_regs(dst.value))
                    add_op_reads(src)
            elif dst.kind == 'indirect_mem':
                # LD (nn), A / LD (nn), pair
                add_op_reads(src)
        return reads, writes

    # ---- ALU: ADD/ADC/SUB/SBC ----
    if mnemonic in ('add', 'adc', 'sub', 'sbc'):
        if len(ops) == 2 and ops[0].kind == 'reg16':
            # 16-bit: ADD HL,pair / ADC HL,pair / SBC HL,pair
            pair_dst = ops[0].value
            reads.update(_pair_regs(pair_dst))
            add_op_reads(ops[1])
            writes.update(_pair_regs(pair_dst))
            writes.add('f')
            if mnemonic in ('adc', 'sbc'):
                reads.add('f')
            return reads, writes

        # 8-bit ALU
        if len(ops) == 2:
            # ADD A, x / ADC A, x / SBC A, x
            reads.add('a')
            add_op_reads(ops[1])
        elif len(ops) == 1:
            # SUB x (implicit A)
            reads.add('a')
            add_op_reads(ops[0])
        writes.add('a')
        writes.add('f')
        if mnemonic in ('adc', 'sbc'):
            reads.add('f')
        return reads, writes

    # ---- Logic: AND/OR/XOR/CP ----
    if mnemonic in ('and', 'or', 'xor', 'cp'):
        if len(ops) >= 1:
            src = ops[-1]
            if mnemonic == 'xor' and src.kind == 'reg8' and src.value == 'a':
                # XOR A: sets A=0, does NOT meaningfully read A
                writes.add('a')
                writes.add('f')
                return reads, writes
            if mnemonic == 'or' and src.kind == 'reg8' and src.value == 'a':
                # OR A: tests A, writes F only (A value unchanged)
                reads.add('a')
                writes.add('f')
                return reads, writes
            if mnemonic == 'cp':
                # CP x: reads A, reads x, writes F only
                reads.add('a')
                add_op_reads(src)
                writes.add('f')
                return reads, writes
            # AND/OR/XOR general
            reads.add('a')
            add_op_reads(src)
            writes.add('a')
            writes.add('f')
        return reads, writes

    # ---- INC/DEC ----
    if mnemonic in ('inc', 'dec'):
        if len(ops) == 1:
            op = ops[0]
            if op.kind == 'reg16':
                # INC/DEC pair: NO flags affected
                reads.update(_pair_regs(op.value))
                writes.update(_pair_regs(op.value))
            elif op.kind == 'reg8':
                # INC/DEC r: flags affected
                reads.add(op.value)
                writes.add(op.value)
                writes.add('f')
            elif op.kind in ('indirect_pair', 'indexed'):
                # INC/DEC (HL) / (IX+d): reads address, writes flags
                add_op_reads(op)
                writes.add('f')
        return reads, writes

    # ---- PUSH/POP ----
    if mnemonic == 'push':
        if ops and ops[0].kind == 'reg16':
            reads.update(_pair_regs(ops[0].value))
        return reads, writes

    if mnemonic == 'pop':
        if ops and ops[0].kind == 'reg16':
            writes.update(_pair_regs(ops[0].value))
        return reads, writes

    # ---- Rotate accumulator ----
    if mnemonic in ('rlca', 'rrca', 'rla', 'rra'):
        reads.add('a')
        writes.update(('a', 'f'))
        if mnemonic in ('rla', 'rra'):
            reads.add('f')
        return reads, writes

    # ---- Rotate/shift CB prefix: RLC/RRC/RL/RR/SLA/SRA/SRL ----
    if mnemonic in ('rlc', 'rrc', 'rl', 'rr', 'sla', 'sra', 'srl'):
        if ops:
            op = ops[-1]
            if op.kind == 'reg8':
                reads.add(op.value)
                writes.add(op.value)
            else:
                add_op_reads(op)
            writes.add('f')
            if mnemonic in ('rl', 'rr'):
                reads.add('f')
        return reads, writes

    # ---- BIT/SET/RES ----
    if mnemonic == 'bit':
        if len(ops) >= 2:
            add_op_reads(ops[-1])
            writes.add('f')
        return reads, writes

    if mnemonic in ('set', 'res'):
        if len(ops) >= 2:
            op = ops[-1]
            if op.kind == 'reg8':
                reads.add(op.value)
                writes.add(op.value)
            else:
                add_op_reads(op)
        return reads, writes

    # ---- Block operations ----
    if mnemonic in ('ldi', 'ldd'):
        reads.update(('b', 'c', 'd', 'e', 'h', 'l'))
        writes.update(('b', 'c', 'd', 'e', 'h', 'l', 'f'))
        return reads, writes

    if mnemonic in ('ldir', 'lddr'):
        reads.update(('b', 'c', 'd', 'e', 'h', 'l'))
        writes.update(('b', 'c', 'd', 'e', 'h', 'l', 'f'))
        return reads, writes

    if mnemonic in ('cpi', 'cpd'):
        reads.update(('a', 'b', 'c', 'h', 'l'))
        writes.update(('b', 'c', 'h', 'l', 'f'))
        return reads, writes

    if mnemonic in ('cpir', 'cpdr'):
        reads.update(('a', 'b', 'c', 'h', 'l'))
        writes.update(('b', 'c', 'h', 'l', 'f'))
        return reads, writes

    # ---- Exchange ----
    if mnemonic == 'ex':
        if len(ops) == 2:
            a, b = ops
            if a.kind == 'reg16' and b.kind == 'reg16':
                if a.value == 'de' and b.value == 'hl':
                    reads.update(('d', 'e', 'h', 'l'))
                    writes.update(('d', 'e', 'h', 'l'))
                elif a.value == 'af':
                    # EX AF, AF'
                    reads.update(('a', 'f'))
                    writes.update(('a', 'f'))
            elif a.kind == 'indirect_pair' and a.value == 'sp':
                # EX (SP), HL / IX / IY
                reads.add('sp')
                if b.kind == 'reg16':
                    reads.update(_pair_regs(b.value))
                    writes.update(_pair_regs(b.value))
        return reads, writes

    if mnemonic == 'exx':
        reads.update(('b', 'c', 'd', 'e', 'h', 'l'))
        writes.update(('b', 'c', 'd', 'e', 'h', 'l'))
        return reads, writes

    # ---- I/O ----
    if mnemonic == 'in':
        if len(ops) == 2:
            dst, src = ops
            if src.kind == 'port_c':
                reads.update(('b', 'c'))
            if dst.kind == 'reg8':
                writes.add(dst.value)
            writes.add('f')
        return reads, writes

    if mnemonic == 'out':
        if len(ops) == 2:
            dst, src = ops
            if dst.kind == 'port_c':
                reads.update(('b', 'c'))
            add_op_reads(src)
        return reads, writes

    if mnemonic in ('ini', 'ind', 'inir', 'indr'):
        reads.update(('b', 'c', 'h', 'l'))
        writes.update(('b', 'h', 'l', 'f'))
        return reads, writes

    if mnemonic in ('outi', 'outd', 'otir', 'otdr'):
        reads.update(('b', 'c', 'h', 'l'))
        writes.update(('b', 'h', 'l', 'f'))
        return reads, writes

    # ---- Control flow ----
    if mnemonic in ('jp', 'jr'):
        if ops and ops[0].kind == 'indirect_pair' and ops[0].value == 'hl':
            reads.update(('h', 'l'))
        if ops and ops[0].kind == 'cond':
            reads.add('f')
        return reads, writes

    if mnemonic == 'call':
        if ops and ops[0].kind == 'cond':
            reads.add('f')
        reads.add('sp')
        writes.add('sp')
        return reads, writes

    if mnemonic in ('ret', 'reti', 'retn'):
        reads.add('sp')
        writes.add('sp')
        if ops and ops[0].kind == 'cond':
            reads.add('f')
        return reads, writes

    if mnemonic == 'djnz':
        reads.add('b')
        writes.update(('b', 'f'))
        return reads, writes

    if mnemonic == 'rst':
        reads.add('sp')
        writes.add('sp')
        return reads, writes

    # ---- Misc ----
    if mnemonic == 'daa':
        reads.update(('a', 'f'))
        writes.update(('a', 'f'))
        return reads, writes

    if mnemonic == 'cpl':
        reads.add('a')
        writes.update(('a', 'f'))
        return reads, writes

    if mnemonic == 'neg':
        reads.add('a')
        writes.update(('a', 'f'))
        return reads, writes

    if mnemonic == 'scf':
        writes.add('f')
        return reads, writes

    if mnemonic == 'ccf':
        reads.add('f')
        writes.add('f')
        return reads, writes

    if mnemonic in ('nop', 'halt', 'ei', 'di'):
        return reads, writes

    if mnemonic == 'im':
        return reads, writes

    # Unknown instruction: return empty sets (safe default)
    return reads, writes


 # -------------------------------------------------------------------------
 # Routine / header parsing
 # -------------------------------------------------------------------------

 ROUTINE_LABEL_RE = re.compile(r'^([A-Za-z_]\w*)\s*:')
 CLOBBER_RE = re.compile(r';\s*Clobbers?:\s*(.+)', re.I)
 OUT_RE = re.compile(r';\s*Out:\s*(.+)', re.I)
 STACK_RE = re.compile(r';\s*Stack\s*:\s*\S', re.I)


 def parse_annotation_regs(text):
    """Parse register list from annotation: 'AF, BC, DE' -> {'a','f','b','c','d','e'}"""
    regs = set()
    for part in text.split(','):
        part = re.sub(r'\(.*?\)', '', part)  # strip parenthetical notes
        part = re.sub(r'=.*', '', part)       # strip "= value" descriptions
        part = part.strip().upper()
        if part in ANNOT_PAIR_MAP:
            regs.update(ANNOT_PAIR_MAP[part])
        elif part in ('A', 'B', 'C', 'D', 'E', 'H', 'L', 'F'):
            regs.add(part.lower())
        elif part in ('IX', 'IY'):
            regs.update(ANNOT_PAIR_MAP.get(part, set()))
        elif part == 'SP':
            regs.add('sp')
        elif part in ('Z', 'NZ', 'NC', 'CARRY', 'ZERO', 'FLAG', 'FLAGS'):
            regs.add('f')  # Flag conditions mean F is an output
    # Also check the raw text for flag-related keywords
    upper = text.upper()
    if any(w in upper for w in ('Z =', 'NZ =', 'CARRY', 'Z FLAG', 'NZ FLAG', 'Z SET', 'NZ SET')):
        regs.add('f')
    return regs


 class Routine:
    """Represents a parsed routine with header annotations."""
    __slots__ = ('name', 'filepath', 'start_lineno', 'lines',
                 'stack_exempt', 'clobbers', 'out_regs')

    def __init__(self, name, filepath, start_lineno):
        self.name = name
        self.filepath = filepath
        self.start_lineno = start_lineno
        self.lines = []           # [(lineno, raw_text), ...]
        self.stack_exempt = False
        self.clobbers = set()     # register names from Clobbers:
        self.out_regs = set()     # register names from Out:


 def parse_file(filepath):
    """Parse a .asm file into a list of Routine objects."""
    try:
        with open(filepath, errors='replace') as f:
            raw = f.readlines()
    except OSError as e:
        print(f"ERROR: cannot open {filepath}: {e}", file=sys.stderr)
        return []

    routines = []
    current = None
    pending_cmts = []  # comment lines buffered before next label

    for lineno, line in enumerate(raw, 1):
        stripped = line.strip()

        # Blank or comment-only
        if not stripped or stripped.startswith(';'):
            if current is None:
                pending_cmts.append(line)
            else:
                current.lines.append((lineno, line))
                if STACK_RE.search(line):
                    current.stack_exempt = True
            continue

        # Non-local label: new routine
        m = ROUTINE_LABEL_RE.match(stripped)
        if m and not stripped.startswith('.'):
            if current is not None:
                routines.append(current)

            name = m.group(1)
            cur = Routine(name, filepath, lineno)

            # Scan header comment block
            header_lines = pending_cmts
            # Also grab trailing comments from previous routine
            if current is not None:
                tail = []
                for _, raw_line in reversed(current.lines):
                    s = raw_line.strip()
                    if not s or s.startswith(';'):
                        tail.append(raw_line)
                    else:
                        break
                header_lines = list(reversed(tail)) + pending_cmts

            in_out_block = False
            for hline in header_lines:
                if STACK_RE.search(hline):
                    cur.stack_exempt = True
                mc = CLOBBER_RE.search(hline)
                if mc:
                    cur.clobbers = parse_annotation_regs(mc.group(1))
                    in_out_block = False
                mo = OUT_RE.search(hline)
                if mo:
                    cur.out_regs = parse_annotation_regs(mo.group(1))
                    in_out_block = True
                elif in_out_block and hline.strip().startswith(';'):
                    # Continuation of Out: block — look for register/flag refs
                    extra = parse_annotation_regs(hline.lstrip('; '))
                    cur.out_regs |= extra
                    # Stop if we hit another annotation keyword
                    if any(kw in hline.upper() for kw in ('IN:', 'CLOBBERS:', 'STACK:')):
                        in_out_block = False

            cur.lines.append((lineno, line))
            pending_cmts = []
            current = cur
            continue

        # Ordinary line
        pending_cmts = []
        if current is not None:
            if not current.clobbers:
                mc = CLOBBER_RE.search(line)
                if mc:
                    current.clobbers = parse_annotation_regs(mc.group(1))
            if not current.out_regs:
                mo = OUT_RE.search(line)
                if mo:
                    current.out_regs = parse_annotation_regs(mo.group(1))
            if STACK_RE.search(line):
                current.stack_exempt = True
            current.lines.append((lineno, line))

    if current is not None:
        routines.append(current)

    return routines


 # -------------------------------------------------------------------------
 # Parse instruction from line (shared helper)
 # -------------------------------------------------------------------------

 def parse_instruction(raw):
    """
    Parse a raw line into (mnemonic, classified_ops, raw_ops) or
    (None, None, None) if not an instruction.
    """
    label, mnemonic, operands_str, operands_raw, is_directive, is_local = parse_line(raw)
    if mnemonic is None or is_directive:
        return None, None, None
    ops = classify_operands(mnemonic, operands_raw)
    return mnemonic, ops, operands_raw


 # -------------------------------------------------------------------------
 # Check 1: Stack balance
 # -------------------------------------------------------------------------

 def check_stack_balance(routine):
    """Check that every RET path has balanced push/pop."""
    if routine.stack_exempt:
        return []

    issues = []
    depth = 0
    sp_complex = False

    for lineno, raw in routine.lines:
        stripped = raw.strip()
        if not stripped or stripped.startswith(';'):
            continue

        mnemonic, ops, _ = parse_instruction(raw)
        if mnemonic is None:
            continue

        # SP modifications we cannot track linearly
        if mnemonic == 'ld' and ops and len(ops) == 2:
            if ops[0].kind == 'reg16' and ops[0].value == 'sp':
                sp_complex = True
                continue
        if mnemonic in ('inc', 'dec') and ops:
            if ops[0].kind == 'reg16' and ops[0].value == 'sp':
                sp_complex = True
                continue
        # EX (SP), xx modifies content but not depth
        if mnemonic == 'ex' and len(ops) == 2:
            if ops[0].kind == 'indirect_pair' and ops[0].value == 'sp':
                continue

        if mnemonic == 'push':
            depth += 1
        elif mnemonic == 'pop':
            depth -= 1
        elif mnemonic in ('ret', 'reti', 'retn'):
            if not sp_complex and depth != 0 and 'lint: ignore' not in raw.lower():
                issues.append({
                    'lineno': lineno,
                    'code': strip_comment(stripped),
                    'depth': depth,
                })

    return issues


 # -------------------------------------------------------------------------
 # Check 2: Register clobber after CALL
 # -------------------------------------------------------------------------

 CALL_TARGET_RE = re.compile(
    r'^\s*call\s+(?:(?:z|nz|c|nc|p|m|pe|po)\s*,\s*)?(\w+)', re.I
 )


 def check_clobbers_in_file(filepath, all_lines, routine_map):
    """
    For each CALL with a known clobber set, scan forward in the basic block
    for reads of clobbered registers without intervening writes.
    Out registers are excluded from the clobber set.
    """
    issues = []

    for i, line in enumerate(all_lines):
        m = CALL_TARGET_RE.search(line)
        if not m:
            continue
        target = m.group(1)
        if target not in routine_map:
            continue

        r = routine_map[target]
        # Effective clobbers = declared clobbers minus Out registers
        clobbered = r.clobbers - r.out_regs
        if not clobbered:
            continue

        call_lineno = i + 1
        restored = set()

        for j in range(i + 1, min(i + 25, len(all_lines))):
            fwd_raw = all_lines[j]
            fwd = fwd_raw.strip()
            if not fwd or fwd.startswith(';'):
                continue

            mnemonic, ops, _ = parse_instruction(fwd_raw)
            if mnemonic is None:
                # Could be a public label => end of basic block
                if ROUTINE_LABEL_RE.match(fwd) and not fwd.startswith('.'):
                    break
                continue

            # Get writes from this instruction to track restores
            _, inst_writes = get_effects(mnemonic, ops)

            # Check for reads of still-clobbered registers BEFORE updating
            # restored set (the write on THIS line restores for FUTURE lines)
            inst_reads, _ = get_effects(mnemonic, ops)
            for reg in (clobbered - restored) & inst_reads:
                # Skip if line has lint: ignore suppression
                if 'lint: ignore' in fwd_raw.lower():
                    continue
                issues.append({
                    'file': filepath,
                    'call_line': call_lineno,
                    'use_line': j + 1,
                    'target': target,
                    'register': reg,
                    'code': fwd,
                })

            restored.update(inst_writes)

            # End of basic block
            if mnemonic in ('jp', 'jr') and (not ops or ops[0].kind != 'cond'):
                break
            if mnemonic in ('ret', 'reti', 'retn'):
                break
            if CALL_TARGET_RE.match(fwd):
                break
            if ROUTINE_LABEL_RE.match(fwd) and not fwd.startswith('.'):
                break

    return issues


 # -------------------------------------------------------------------------
 # Check 3: Dead stores in push/pop pairs
 # -------------------------------------------------------------------------

 def check_dead_stores(routine):
    """
    Detect writes to a sub-register between push and matching pop that are
    never read before the pop discards them.
    Skip PUSH AF (too many false positives).
    """
    issues = []

    # Build instruction list
    insns = []
    for lineno, raw in routine.lines:
        stripped = raw.strip()
        if not stripped or stripped.startswith(';'):
            continue
        mnemonic, ops, raw_ops = parse_instruction(raw)
        if mnemonic is None:
            # Could be a label-only line
            label, _, _, _, _, is_local = parse_line(raw)
            if label:
                insns.append((lineno, 'label', None, None, is_local))
            continue
        insns.append((lineno, mnemonic, ops, raw_ops, False))

    for i, (push_ln, mn, push_ops, _, _) in enumerate(insns):
        if mn != 'push' or not push_ops:
            continue
        if push_ops[0].kind != 'reg16':
            continue

        pair = push_ops[0].value
        if pair in ('ix', 'iy', 'sp'):
            continue
        # Skip AF: too many intentional save/restore patterns
        if pair == 'af':
            continue

        sub_regs = set(PAIR_EXPAND[pair])
        written = {}  # reg -> write_lineno

        depth = 1
        for j in range(i + 1, len(insns)):
            ln, jmn, jops, _, is_label = insns[j]

            if is_label or jmn == 'label':
                continue

            # Nested push/pop tracking
            if jmn == 'push' and jops and jops[0].kind == 'reg16' and jops[0].value == pair:
                depth += 1
                continue
            if jmn == 'pop' and jops and jops[0].kind == 'reg16' and jops[0].value == pair:
                depth -= 1
                if depth == 0:
                    # Matching pop found: report unread writes
                    for reg, wln in written.items():
                        # Check if the write line has lint: ignore
                        write_raw = routine.lines[wln - routine.lines[0][0]] if wln - routine.lines[0][0] < len(routine.lines) else ('', '')
                        if isinstance(write_raw, tuple) and len(write_raw) == 2:
                            write_raw = write_raw[1]
                        if 'lint: ignore' in str(write_raw).lower():
                            continue
                        issues.append({
                            'push_lineno': push_ln,
                            'write_lineno': wln,
                            'pop_lineno': ln,
                            'register': reg,
                            'pair': pair,
                        })
                    break
                continue
            # Different pair popped: interleaved stacks, abort
            if jmn == 'pop':
                break

            # Unconditional branch or ret: cannot track linearly
            if jmn in ('ret', 'reti', 'retn'):
                break
            if jmn in ('jp', 'jr') and jops and jops[0].kind != 'cond':
                break

            # CALL: clear written set (callee may have read the values)
            if jmn == 'call':
                written.clear()
                continue

            # Get instruction effects
            inst_reads, inst_writes = get_effects(jmn, jops)

            # Check reads first: any read of a written reg clears the dead flag
            for reg in list(written.keys()):
                if reg in inst_reads:
                    del written[reg]

            # Track writes to sub-registers
            for reg in sub_regs & inst_writes:
                if reg not in inst_reads:
                    # Pure write: potentially dead
                    written[reg] = ln

    return issues


 # -------------------------------------------------------------------------
 # Main
 # -------------------------------------------------------------------------

 def main():
    if len(sys.argv) < 2:
        files = []
        for d in ['src/bios', 'src/bdos', 'src/ccp']:
            p = Path(d)
            if p.exists():
                files.extend(sorted(str(f) for f in p.glob('*.asm')))
    else:
        files = sys.argv[1:]

    if not files:
        print("No source files found.", file=sys.stderr)
        return 1

    # Parse all files
    all_routines = {}
    file_routines = {}

    for fp in files:
        routines = parse_file(fp)
        file_routines[fp] = routines
        for r in routines:
            all_routines[r.name] = r

    total = 0

    # ---- Check 1: Stack balance ----
    print("=== Stack Balance ===")
    stack_count = 0
    for fp in files:
        for routine in file_routines.get(fp, []):
            for iss in check_stack_balance(routine):
                d = iss['depth']
                sign = '+' if d > 0 else ''
                print(f"  STACK  {fp}:{iss['lineno']}: "
                      f"{routine.name}: ret with depth {sign}{d} "
                      f"({abs(d)} word{'s' if abs(d) != 1 else ''} "
                      f"{'too many pushes' if d > 0 else 'too many pops'})"
                      f"  [{iss['code']}]")
                stack_count += 1
    if stack_count == 0:
        print("  OK -- all routines balanced.")
    total += stack_count

    print()

    # ---- Check 2: Register clobber ----
    print("=== Register Clobber After CALL ===")
    clob_count = 0
    for fp in files:
        try:
            with open(fp, errors='replace') as f:
                lines = f.readlines()
        except OSError:
            continue
        # Build per-file routine map: same-file definitions take priority
        local_routines = {r.name: r for r in file_routines.get(fp, [])}
        merged = {**all_routines, **local_routines}
        for iss in check_clobbers_in_file(fp, lines, merged):
            print(f"  CLOBBER {iss['file']}:{iss['call_line']}: "
                  f"call {iss['target']} clobbers {iss['register']}, "
                  f"but line {iss['use_line']} reads it: {iss['code'].strip()}")
            clob_count += 1
    if clob_count == 0:
        print("  OK -- no clobber issues.")
    total += clob_count

    print()

    # ---- Check 3: Dead stores ----
    print("=== Dead Stores (push/write/pop without read) ===")
    dead_count = 0
    for fp in files:
        for routine in file_routines.get(fp, []):
            for iss in check_dead_stores(routine):
                print(f"  DEAD   {fp}:{iss['write_lineno']}: "
                      f"{routine.name}: write to {iss['register']} at line "
                      f"{iss['write_lineno']} is discarded by pop {iss['pair']} "
                      f"at line {iss['pop_lineno']} "
                      f"(pushed at line {iss['push_lineno']})")
                dead_count += 1
    if dead_count == 0:
        print("  OK -- no dead stores found.")
    total += dead_count

    print()
    if total:
        print(f"{total} issue(s) found.")
    else:
        print("All checks passed.")

    return 1 if total else 0


 if __name__ == '__main__':
    sys.exit(main())
No results found