#!/usr/bin/env python # -*- coding: utf-8 -*- # Copyright (C) 2014-2015 Ben Kurtovic # Released under the terms of the MIT License. See LICENSE for details. """ This script generates 'src/assembler/instructions.inc.c' from 'src/assembler/instructions.yml'. It should be run automatically by make when the latter is modified, but can also be run manually. """ from __future__ import print_function import re import time SOURCE = "src/assembler/instructions.yml" DEST = "src/assembler/instructions.inc.c" ENCODING = "utf8" TAB = " " * 4 try: import yaml except ImportError: print("Error: PyYAML is required (https://pypi.python.org/pypi/PyYAML)\n" "If you don't want to rebuild {0}, do:\n`make -t {0}`".format(DEST)) exit(1) re_date = re.compile(r"^(\s*@AUTOGEN_DATE\s*)(.*?)$", re.M) re_inst = re.compile( r"(/\* @AUTOGEN_INST_BLOCK_START \*/\n*)(.*?)" r"(\n*/\* @AUTOGEN_INST_BLOCK_END \*/)", re.S) re_lookup = re.compile( r"(/\* @AUTOGEN_LOOKUP_BLOCK_START \*/\n*)(.*?)" r"(\n*/\* @AUTOGEN_LOOKUP_BLOCK_END \*/)", re.S) def _rindex(L, val): """ Return the index of the last occurence of val in L. """ return len(L) - L[::-1].index(val) - 1 def _atoi(value): """ Try to convert a string to an integer, supporting decimal and hexadecimal. """ try: return int(value) except ValueError: return int(value, 16) def _is_call(call, func): """ Return whether the first argument is a function call of the second. """ return call.startswith(func + "(") and call.endswith(")") def _call_args(call, func): """ Given a call and a function name, return the function call arguments. """ return call[len(func) + 1:-1].strip() def _parse_step_args(call, func): """ Parse arguments to a step function (e.g. reg() or cond()). """ args = _call_args(call, func) if " " in args: return map(_atoi, args.split(" ")) else: return _atoi(args), 1 class ASMInstError(Exception): """ Base class for all errors while trying to generate the instructions file. """ class Instruction(object): """ Represent a single ASM instruction mnemonic. """ ARG_TYPES = { "register": "AT_REGISTER", "immediate": "AT_IMMEDIATE", "indirect": "AT_INDIRECT", "indexed": "AT_INDEXED", "condition": "AT_CONDITION", "port": "AT_PORT" } PSEUDO_TYPES = { "indirect_hl_or_indexed": ["AT_INDIRECT", "AT_INDEXED"] } REGISTER_OFFSETS = { "a": 7, "b": 0, "c": 1, "d": 2, "e": 3, "h": 4, "ixh": 4, "iyh": 4, "l": 5, "ixl": 5, "iyl": 5, "bc": 0, "de": 1, "hl": 2, "ix": 2, "iy": 2, "sp": 3 } CONDITION_ORDER = ["nz", "z", "nc", "c", "po", "pe", "p", "m"] def __init__(self, name, data): self._name = name self._data = data self._has_optional_args = False def _get_arg_parse_mask(self, num): """ Return the appropriate mask to parse_args() for the num-th argument. """ types = set() optional = False for case in self._data["cases"]: if num < len(case["type"]): atype = case["type"][num] if atype in self.ARG_TYPES: types.add(self.ARG_TYPES[atype]) else: types.update(self.PSEUDO_TYPES[atype]) else: optional = True if not types: return "AT_NONE" if optional: types.add("AT_OPTIONAL") self._has_optional_args = True return "|".join(sorted(types)) def _handle_return(self, ret, indent=1): """ Return code to handle an instruction return statement. """ data = ", ".join("0x%02X" % byte if isinstance(byte, int) else byte for byte in ret) return TAB * indent + "INST_RETURN({0}, {1})".format(len(ret), data) def _build_case_type_check(self, args): """ Return the test part of an if statement for an instruction case. """ conds = ["INST_TYPE({0}) == {1}".format(i, self.ARG_TYPES[cond]) for i, cond in enumerate(args)] check = " && ".join(conds) if self._has_optional_args: return "INST_NARGS == {0} && ".format(len(args)) + check return check def _build_register_check(self, num, cond): """ Return an expression to check for a particular register value. """ return "INST_REG({0}) == REG_{1}".format(num, cond.upper()) def _build_immediate_check(self, num, cond): """ Return an expression to check for a particular immediate value. """ if "." in cond: itype, value = cond.split(".", 1) vtype = "sval" if itype.upper() in ["S8", "REL"] else "uval" test1 = "INST_IMM({0}).mask & IMM_{1}".format(num, itype.upper()) if (itype.upper() == "U16"): test1 += " && !INST_IMM({0}).is_label".format(num) test2 = "INST_IMM({0}).{1} == {2}".format(num, vtype, _atoi(value)) return "({0} && {1})".format(test1, test2) return "INST_IMM({0}).mask & IMM_{1}".format(num, cond.upper()) def _build_indirect_check(self, num, cond): """ Return an expression to check for a particular indirect value. """ if cond.startswith("reg."): test1 = "INST_INDIRECT({0}).type == AT_REGISTER".format(num) test2 = "INST_INDIRECT({0}).addr.reg == REG_{1}".format( num, cond[len("reg."):].upper()) return "({0} && {1})".format(test1, test2) if cond == "imm" or cond == "immediate": return "INST_INDIRECT({0}).type == AT_IMMEDIATE".format(num) err = "Unknown condition for indirect argument: {0}" return ASMInstError(err.format(cond)) def _build_indexed_check(self, num, cond): """ Return an expression to check for a particular indexed value. """ raise ASMInstError("The indexed arg type does not support conditions") def _build_condition_check(self, num, cond): """ Return an expression to check for a particular condition value. """ return "INST_COND({0}) == COND_{1}".format(num, cond.upper()) def _build_port_check(self, num, cond): """ Return an expression to check for a particular port value. """ if cond == "reg" or cond == "reg.c": return "INST_PORT({0}).type == AT_REGISTER".format(num) if cond == "imm" or cond == "immediate": return "INST_PORT({0}).type == AT_IMMEDIATE".format(num) err = "Unknown condition for port argument: {0}" return ASMInstError(err.format(cond)) _SUBCASE_LOOKUP_TABLE = { "register": _build_register_check, "immediate": _build_immediate_check, "indirect": _build_indirect_check, "indexed": _build_indexed_check, "condition": _build_condition_check, "port": _build_port_check } def _build_subcase_check(self, types, conds): """ Return the test part of an if statement for an instruction subcase. """ conds = [self._SUBCASE_LOOKUP_TABLE[types[i]](self, i, cond) for i, cond in enumerate(conds) if cond != "_"] return " && ".join(conds) def _iter_permutations(self, types, conds): """ Iterate over all permutations of the given subcase conditions. """ def split(typ, cond): if "|" in cond: splits = [split(typ, c) for c in cond.split("|")] merged = [choice for s in splits for choice in s] if len(merged) != len(set(merged)): msg = "Repeated conditions for {0}: {1}" raise ASMInstError(msg.format(typ, cond)) return merged if typ == "register": if cond == "i": return ["ix", "iy"] if cond == "ih": return ["ixh", "iyh"] if cond == "il": return ["ixl", "iyl"] return [cond] splits = [split(typ, cond) for typ, cond in zip(types, conds)] num = max(len(cond) for cond in splits) if any(1 < len(cond) < num for cond in splits): msg = "Invalid condition permutations: {0}" raise ASMInstError(msg.format(conds)) choices = [cond * num if len(cond) == 1 else cond for cond in splits] return zip(*choices) def _adapt_return(self, types, conds, ret): """ Return a modified byte list to accomodate for prefixes and immediates. """ def handle_reg_func(call): base, stride = _parse_step_args(call, "reg") index = _rindex(types, "register") return base + self.REGISTER_OFFSETS[conds[index]] * stride ret = ret[:] for i, byte in enumerate(ret): if not isinstance(byte, int): if byte == "u8": try: index = types.index("immediate") imm = "INST_IMM({0})".format(index) except ValueError: index = types.index("port") imm = "INST_PORT({0}).port.imm".format(index) ret[i] = imm + ".uval" elif byte == "u16": if i < len(ret) - 1: raise ASMInstError("U16 return byte must be last") try: index = types.index("immediate") imm = "INST_IMM({0})".format(index) except ValueError: indir = types.index("indirect") if not conds[indir].startswith("imm"): msg = "Passing non-immediate indirect as immediate" raise ASMInstError(msg) imm = "INST_INDIRECT({0}).addr.imm".format(indir) ret[i] = "INST_IMM_U16_B1({0})".format(imm) ret.append("INST_IMM_U16_B2({0})".format(imm)) break elif byte == "rel": index = types.index("immediate") ret[i] = "INST_IMM({0}).sval - 2".format(index) elif _is_call(byte, "bit"): index = types.index("immediate") base = _call_args(byte, "bit") if _is_call(base, "reg"): base = handle_reg_func(base) ret[i] = "0x{0:02X} + 8 * INST_IMM({1}).uval".format( _atoi(base), index) elif _is_call(byte, "reg"): ret[i] = handle_reg_func(byte) elif _is_call(byte, "cond"): base, stride = _parse_step_args(byte, "cond") index = types.index("condition") offset = self.CONDITION_ORDER.index(conds[index]) ret[i] = base + offset * stride else: msg = "Unsupported return byte: {0}" raise ASMInstError(msg.format(byte)) for i, cond in enumerate(conds): if types[i] == "register" and cond[0] == "i": prefix = "INST_I{0}_PREFIX".format(cond[1].upper()) if ret[0] != prefix: ret.insert(0, prefix) elif types[i] == "indexed": ret.insert(0, "INST_INDEX_PREFIX({0})".format(i)) ret.insert(2, "INST_INDEX({0}).offset".format(i)) return ret def _handle_null_case(self, case): """ Return code to handle an instruction case that takes no arguments. """ return [ TAB + "if (INST_NARGS == 0) {", self._handle_return(case["return"], 2), TAB + "}" ] def _handle_pseudo_case(self, pseudo, case): """ Return code to handle an instruction pseudo-case. Pseudo-cases are cases that have pseudo-types as arguments. This means they are expanded to cover multiple "real" argument types. """ index = case["type"].index(pseudo) if pseudo == "indirect_hl_or_indexed": case["type"][index] = "indexed" indexed = self._handle_case(case) case["type"][index] = "indirect" indirect = self._handle_case(case) base_cond = self._build_case_type_check(case["type"]) hl_reg = TAB * 3 + self._build_indirect_check(index, "reg.hl") indirect[0] = TAB + "if ({0} &&\n{1}) {{".format(base_cond, hl_reg) return indirect + indexed raise ASMInstError("Unknown pseudo-type: {0}".format(pseudo)) def _handle_case(self, case): """ Return code to handle an instruction case. """ ctype = case["type"] if not ctype: return self._handle_null_case(case) for pseudo in self.PSEUDO_TYPES: if pseudo in ctype: return self._handle_pseudo_case(pseudo, case) lines = [] cond = self._build_case_type_check(ctype) lines.append(TAB + "if ({0}) {{".format(cond)) subcases = [(perm, sub["return"]) for sub in case["cases"] for perm in self._iter_permutations(ctype, sub["if"])] for cond, ret in subcases: check = self._build_subcase_check(ctype, cond) ret = self._adapt_return(ctype, cond, ret) if check: lines.append(TAB * 2 + "if ({0})".format(check)) lines.append(self._handle_return(ret, 3)) else: lines.append(self._handle_return(ret, 2)) break # Unconditional subcase else: lines.append(TAB * 2 + "INST_ERROR(ARG_VALUE)") lines.append(TAB + "}") return lines def render(self): """ Convert data for an individual instruction into a C parse function. """ lines = [] if self._data["args"]: lines.append("{tab}INST_TAKES_ARGS(\n{tab2}{0},\n{tab2}{1}," "\n{tab2}{2}\n{tab})".format( self._get_arg_parse_mask(0), self._get_arg_parse_mask(1), self._get_arg_parse_mask(2), tab=TAB, tab2=TAB * 2)) else: lines.append(TAB + "INST_TAKES_NO_ARGS") if "return" in self._data: lines.append(self._handle_return(self._data["return"])) elif "cases" in self._data: for case in self._data["cases"]: lines.extend(self._handle_case(case)) lines.append(TAB + "INST_ERROR(ARG_TYPE)") else: msg = "Missing return or case block for {0} instruction" raise ASMInstError(msg.format(self._name)) contents = "\n".join(lines) return "INST_FUNC({0})\n{{\n{1}\n}}".format(self._name, contents) def _build_inst_block(data): """ Return the instruction parser block, given instruction data. """ return "\n\n".join( Instruction(k, v).render() for k, v in sorted(data.items())) def _build_lookup_block(data): """ Return the instruction lookup block, given instruction data. """ macro = TAB + "HANDLE({0})" return "\n".join(macro.format(inst) for inst in sorted(data.keys())) def _process(template, data): """ Return C code generated from a source template and instruction data. """ inst_block = _build_inst_block(data) lookup_block = _build_lookup_block(data) date = time.asctime(time.gmtime()) result = re_date.sub(r"\1{0} UTC".format(date), template) result = re_inst.sub(r"\1{0}\3".format(inst_block), result) result = re_lookup.sub(r"\1{0}\3".format(lookup_block), result) return result def main(): """ Main script entry point. """ with open(SOURCE, "r") as fp: text = fp.read().decode(ENCODING) with open(DEST, "r") as fp: template = fp.read().decode(ENCODING) data = yaml.load(text) result = _process(template, data) with open(DEST, "w") as fp: fp.write(result.encode(ENCODING)) if __name__ == "__main__": main()