An emulator, assembler, and disassembler for the Sega Game Gear
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 

502 line
17 KiB

  1. #!/usr/bin/env python
  2. # -*- coding: utf-8 -*-
  3. # Copyright (C) 2014-2015 Ben Kurtovic <ben.kurtovic@gmail.com>
  4. # Released under the terms of the MIT License. See LICENSE for details.
  5. """
  6. This script generates 'src/assembler/instructions.inc.c' from
  7. 'src/assembler/instructions.yml'. It should be run automatically by make
  8. when the latter is modified, but can also be run manually.
  9. """
  10. from __future__ import print_function
  11. import re
  12. import time
  13. SOURCE = "src/assembler/instructions.yml"
  14. DEST = "src/assembler/instructions.inc.c"
  15. ENCODING = "utf8"
  16. TAB = " " * 4
  17. try:
  18. import yaml
  19. except ImportError:
  20. print("Error: PyYAML is required (https://pypi.python.org/pypi/PyYAML)\n"
  21. "If you don't want to rebuild {0}, do:\n`make -t {0}`".format(DEST))
  22. exit(1)
  23. re_date = re.compile(r"^(\s*@AUTOGEN_DATE\s*)(.*?)$", re.M)
  24. re_inst = re.compile(
  25. r"(/\* @AUTOGEN_INST_BLOCK_START \*/\n*)(.*?)"
  26. r"(\n*/\* @AUTOGEN_INST_BLOCK_END \*/)", re.S)
  27. re_lookup = re.compile(
  28. r"(/\* @AUTOGEN_LOOKUP_BLOCK_START \*/\n*)(.*?)"
  29. r"(\n*/\* @AUTOGEN_LOOKUP_BLOCK_END \*/)", re.S)
  30. def _rindex(L, val):
  31. """
  32. Return the index of the last occurence of val in L.
  33. """
  34. return len(L) - L[::-1].index(val) - 1
  35. def _atoi(value):
  36. """
  37. Try to convert a string to an integer, supporting decimal and hexadecimal.
  38. """
  39. try:
  40. return int(value)
  41. except ValueError:
  42. return int(value, 16)
  43. def _is_call(call, func):
  44. """
  45. Return whether the first argument is a function call of the second.
  46. """
  47. return call.startswith(func + "(") and call.endswith(")")
  48. def _call_args(call):
  49. """
  50. Given a call and a function name, return the function call arguments.
  51. """
  52. return call[call.index("(") + 1:-1].strip()
  53. def _parse_step_args(call):
  54. """
  55. Parse arguments to a step function (e.g. reg() or cond()).
  56. """
  57. args = _call_args(call)
  58. if " " in args:
  59. return map(_atoi, args.split(" "))
  60. else:
  61. return _atoi(args), 1
  62. class ASMInstError(Exception):
  63. """
  64. Base class for all errors while trying to generate the instructions file.
  65. """
  66. class Instruction(object):
  67. """
  68. Represent a single ASM instruction mnemonic.
  69. """
  70. ARG_TYPES = {
  71. "register": "AT_REGISTER",
  72. "immediate": "AT_IMMEDIATE",
  73. "indirect": "AT_INDIRECT",
  74. "indexed": "AT_INDEXED",
  75. "condition": "AT_CONDITION",
  76. "port": "AT_PORT"
  77. }
  78. PSEUDO_TYPES = {
  79. "indirect_hl_or_indexed": ["AT_INDIRECT", "AT_INDEXED"]
  80. }
  81. REGISTER_OFFSETS = {
  82. "a": 7,
  83. "b": 0,
  84. "c": 1,
  85. "d": 2,
  86. "e": 3,
  87. "h": 4,
  88. "ixh": 4,
  89. "iyh": 4,
  90. "l": 5,
  91. "ixl": 5,
  92. "iyl": 5,
  93. "bc": 0,
  94. "de": 1,
  95. "hl": 2,
  96. "ix": 2,
  97. "iy": 2,
  98. "sp": 3,
  99. "af": 3
  100. }
  101. CONDITION_ORDER = ["nz", "z", "nc", "c", "po", "pe", "p", "m"]
  102. def __init__(self, name, data):
  103. self._name = name
  104. self._data = data
  105. self._has_optional_args = False
  106. def _get_arg_parse_mask(self, num):
  107. """
  108. Return the appropriate mask to parse_args() for the num-th argument.
  109. """
  110. types = set()
  111. optional = False
  112. for case in self._data["cases"]:
  113. if num < len(case["type"]):
  114. atype = case["type"][num]
  115. if atype in self.ARG_TYPES:
  116. types.add(self.ARG_TYPES[atype])
  117. else:
  118. types.update(self.PSEUDO_TYPES[atype])
  119. else:
  120. optional = True
  121. if not types:
  122. return "AT_NONE"
  123. if optional:
  124. types.add("AT_OPTIONAL")
  125. self._has_optional_args = True
  126. return "|".join(sorted(types))
  127. def _handle_return(self, ret, indent=1):
  128. """
  129. Return code to handle an instruction return statement.
  130. """
  131. data = ", ".join("0x%02X" % byte if isinstance(byte, int) else byte
  132. for byte in ret)
  133. return TAB * indent + "INST_RETURN({0}, {1})".format(len(ret), data)
  134. def _build_case_type_check(self, args):
  135. """
  136. Return the test part of an if statement for an instruction case.
  137. """
  138. conds = ["INST_TYPE({0}) == {1}".format(i, self.ARG_TYPES[cond])
  139. for i, cond in enumerate(args)]
  140. check = " && ".join(conds)
  141. if self._has_optional_args:
  142. return "INST_NARGS == {0} && ".format(len(args)) + check
  143. return check
  144. def _build_register_check(self, num, cond):
  145. """
  146. Return an expression to check for a particular register value.
  147. """
  148. return "INST_REG({0}) == REG_{1}".format(num, cond.upper())
  149. def _build_immediate_check(self, num, cond):
  150. """
  151. Return an expression to check for a particular immediate value.
  152. """
  153. if "." in cond:
  154. itype, value = cond.split(".", 1)
  155. vtype = "sval" if itype.upper() in ["S8", "REL"] else "uval"
  156. test1 = "INST_IMM({0}).mask & IMM_{1}".format(num, itype.upper())
  157. if (itype.upper() == "U16"):
  158. test1 += " && !INST_IMM({0}).is_label".format(num)
  159. test2 = "INST_IMM({0}).{1} == {2}".format(num, vtype, _atoi(value))
  160. return "({0} && {1})".format(test1, test2)
  161. return "INST_IMM({0}).mask & IMM_{1}".format(num, cond.upper())
  162. def _build_indirect_check(self, num, cond):
  163. """
  164. Return an expression to check for a particular indirect value.
  165. """
  166. if cond.startswith("reg."):
  167. test1 = "INST_INDIRECT({0}).type == AT_REGISTER".format(num)
  168. test2 = "INST_INDIRECT({0}).addr.reg == REG_{1}".format(
  169. num, cond[len("reg."):].upper())
  170. return "({0} && {1})".format(test1, test2)
  171. if cond == "imm" or cond == "immediate":
  172. return "INST_INDIRECT({0}).type == AT_IMMEDIATE".format(num)
  173. err = "Unknown condition for indirect argument: {0}"
  174. raise ASMInstError(err.format(cond))
  175. def _build_indexed_check(self, num, cond):
  176. """
  177. Return an expression to check for a particular indexed value.
  178. """
  179. raise ASMInstError("The indexed arg type does not support conditions")
  180. def _build_condition_check(self, num, cond):
  181. """
  182. Return an expression to check for a particular condition value.
  183. """
  184. return "INST_COND({0}) == COND_{1}".format(num, cond.upper())
  185. def _build_port_check(self, num, cond):
  186. """
  187. Return an expression to check for a particular port value.
  188. """
  189. if cond == "reg" or cond == "reg.c":
  190. return "INST_PORT({0}).type == AT_REGISTER".format(num)
  191. if cond == "imm" or cond == "immediate":
  192. return "INST_PORT({0}).type == AT_IMMEDIATE".format(num)
  193. err = "Unknown condition for port argument: {0}"
  194. raise ASMInstError(err.format(cond))
  195. _SUBCASE_LOOKUP_TABLE = {
  196. "register": _build_register_check,
  197. "immediate": _build_immediate_check,
  198. "indirect": _build_indirect_check,
  199. "indexed": _build_indexed_check,
  200. "condition": _build_condition_check,
  201. "port": _build_port_check
  202. }
  203. def _build_subcase_check(self, types, conds):
  204. """
  205. Return the test part of an if statement for an instruction subcase.
  206. """
  207. conds = [self._SUBCASE_LOOKUP_TABLE[types[i]](self, i, cond)
  208. for i, cond in enumerate(conds) if cond != "_"]
  209. return " && ".join(conds)
  210. def _iter_permutations(self, types, conds):
  211. """
  212. Iterate over all permutations of the given subcase conditions.
  213. """
  214. def split(typ, cond):
  215. if "|" in cond:
  216. splits = [split(typ, c) for c in cond.split("|")]
  217. merged = [choice for s in splits for choice in s]
  218. if len(merged) != len(set(merged)):
  219. msg = "Repeated conditions for {0}: {1}"
  220. raise ASMInstError(msg.format(typ, cond))
  221. return merged
  222. if typ == "register":
  223. if cond == "ixy":
  224. return ["ix", "iy"]
  225. if cond == "ih":
  226. return ["ixh", "iyh"]
  227. if cond == "il":
  228. return ["ixl", "iyl"]
  229. if typ == "indirect" and cond == "reg.ixy":
  230. return ["reg.ix", "reg.iy"]
  231. return [cond]
  232. splits = [split(typ, cond) for typ, cond in zip(types, conds)]
  233. num = max(len(cond) for cond in splits)
  234. if any(1 < len(cond) < num for cond in splits):
  235. msg = "Invalid condition permutations: {0}"
  236. raise ASMInstError(msg.format(conds))
  237. choices = [cond * num if len(cond) == 1 else cond for cond in splits]
  238. return zip(*choices)
  239. def _adapt_return(self, types, conds, ret):
  240. """
  241. Return a modified byte list to accomodate for prefixes and immediates.
  242. """
  243. def handle_reg_func(call):
  244. base, stride = _parse_step_args(call)
  245. index = _rindex(types, "register")
  246. return base + self.REGISTER_OFFSETS[conds[index]] * stride
  247. def handle_index(which):
  248. prefix = "INST_I{0}_PREFIX".format(which.upper())
  249. if ret[0] != prefix:
  250. ret.insert(0, prefix)
  251. ret = ret[:]
  252. for i, byte in enumerate(ret):
  253. if not isinstance(byte, int):
  254. if byte == "u8":
  255. try:
  256. index = types.index("immediate")
  257. imm = "INST_IMM({0})".format(index)
  258. except ValueError:
  259. index = types.index("port")
  260. imm = "INST_PORT({0}).port.imm".format(index)
  261. ret[i] = imm + ".uval"
  262. elif byte == "u16":
  263. if i < len(ret) - 1:
  264. raise ASMInstError("U16 return byte must be last")
  265. try:
  266. index = types.index("immediate")
  267. imm = "INST_IMM({0})".format(index)
  268. except ValueError:
  269. indir = types.index("indirect")
  270. if not conds[indir].startswith("imm"):
  271. msg = "Passing non-immediate indirect as immediate"
  272. raise ASMInstError(msg)
  273. imm = "INST_INDIRECT({0}).addr.imm".format(indir)
  274. ret[i] = "INST_IMM_U16_B1({0})".format(imm)
  275. ret.append("INST_IMM_U16_B2({0})".format(imm))
  276. break
  277. elif byte == "rel":
  278. index = types.index("immediate")
  279. ret[i] = "INST_IMM({0}).sval - 2".format(index)
  280. elif _is_call(byte, "bit"):
  281. index = types.index("immediate")
  282. base = _call_args(byte)
  283. if _is_call(base, "reg"):
  284. base = handle_reg_func(base)
  285. ret[i] = "0x{0:02X} + 8 * INST_IMM({1}).uval".format(
  286. _atoi(base), index)
  287. elif _is_call(byte, "rst"):
  288. index = types.index("immediate")
  289. base = _call_args(byte)
  290. ret[i] = "0x{0:02X} + INST_IMM({1}).uval".format(
  291. _atoi(base), index)
  292. elif _is_call(byte, "reg"):
  293. ret[i] = handle_reg_func(byte)
  294. elif _is_call(byte, "cond"):
  295. base, stride = _parse_step_args(byte)
  296. index = types.index("condition")
  297. offset = self.CONDITION_ORDER.index(conds[index])
  298. ret[i] = base + offset * stride
  299. else:
  300. msg = "Unsupported return byte: {0}"
  301. raise ASMInstError(msg.format(byte))
  302. for i, cond in enumerate(conds):
  303. if types[i] == "register" and cond.startswith(("ix", "iy")):
  304. handle_index(cond[1])
  305. elif types[i] == "indirect" and cond in ("reg.ix", "reg.iy"):
  306. handle_index(cond[5])
  307. elif types[i] == "indexed":
  308. ret.insert(0, "INST_INDEX_PREFIX({0})".format(i))
  309. ret.insert(2, "INST_INDEX({0}).offset".format(i))
  310. return ret
  311. def _handle_null_case(self, case):
  312. """
  313. Return code to handle an instruction case that takes no arguments.
  314. """
  315. return [
  316. TAB + "if (INST_NARGS == 0) {",
  317. self._handle_return(case["return"], 2),
  318. TAB + "}"
  319. ]
  320. def _handle_pseudo_case(self, pseudo, case):
  321. """
  322. Return code to handle an instruction pseudo-case.
  323. Pseudo-cases are cases that have pseudo-types as arguments. This means
  324. they are expanded to cover multiple "real" argument types.
  325. """
  326. index = case["type"].index(pseudo)
  327. if pseudo == "indirect_hl_or_indexed":
  328. case["type"][index] = "indexed"
  329. indexed = self._handle_case(case)
  330. case["type"][index] = "indirect"
  331. indirect = self._handle_case(case)
  332. base_cond = self._build_case_type_check(case["type"])
  333. hl_reg = TAB * 3 + self._build_indirect_check(index, "reg.hl")
  334. indirect[0] = TAB + "if ({0} &&\n{1}) {{".format(base_cond, hl_reg)
  335. return indirect + indexed
  336. raise ASMInstError("Unknown pseudo-type: {0}".format(pseudo))
  337. def _handle_case(self, case):
  338. """
  339. Return code to handle an instruction case.
  340. """
  341. ctype = case["type"]
  342. if not ctype:
  343. return self._handle_null_case(case)
  344. for pseudo in self.PSEUDO_TYPES:
  345. if pseudo in ctype:
  346. return self._handle_pseudo_case(pseudo, case)
  347. lines = []
  348. cond = self._build_case_type_check(ctype)
  349. lines.append(TAB + "if ({0}) {{".format(cond))
  350. subcases = [(perm, sub["return"]) for sub in case["cases"]
  351. for perm in self._iter_permutations(ctype, sub["if"])]
  352. for cond, ret in subcases:
  353. check = self._build_subcase_check(ctype, cond)
  354. ret = self._adapt_return(ctype, cond, ret)
  355. if check:
  356. lines.append(TAB * 2 + "if ({0})".format(check))
  357. lines.append(self._handle_return(ret, 3))
  358. else:
  359. lines.append(self._handle_return(ret, 2))
  360. break # Unconditional subcase
  361. else:
  362. lines.append(TAB * 2 + "INST_ERROR(ARG_VALUE)")
  363. lines.append(TAB + "}")
  364. return lines
  365. def render(self):
  366. """
  367. Convert data for an individual instruction into a C parse function.
  368. """
  369. lines = []
  370. if self._data["args"]:
  371. lines.append("{tab}INST_TAKES_ARGS(\n{tab2}{0},\n{tab2}{1},"
  372. "\n{tab2}{2}\n{tab})".format(
  373. self._get_arg_parse_mask(0), self._get_arg_parse_mask(1),
  374. self._get_arg_parse_mask(2), tab=TAB, tab2=TAB * 2))
  375. else:
  376. lines.append(TAB + "INST_TAKES_NO_ARGS")
  377. if "return" in self._data:
  378. lines.append(self._handle_return(self._data["return"]))
  379. elif "cases" in self._data:
  380. for case in self._data["cases"]:
  381. lines.extend(self._handle_case(case))
  382. lines.append(TAB + "INST_ERROR(ARG_TYPE)")
  383. else:
  384. msg = "Missing return or case block for {0} instruction"
  385. raise ASMInstError(msg.format(self._name))
  386. contents = "\n".join(lines)
  387. return "INST_FUNC({0})\n{{\n{1}\n}}".format(self._name, contents)
  388. def _build_inst_block(data):
  389. """
  390. Return the instruction parser block, given instruction data.
  391. """
  392. return "\n\n".join(
  393. Instruction(k, v).render() for k, v in sorted(data.items()))
  394. def _build_lookup_block(data):
  395. """
  396. Return the instruction lookup block, given instruction data.
  397. """
  398. macro = TAB + "HANDLE({0})"
  399. return "\n".join(macro.format(inst) for inst in sorted(data.keys()))
  400. def _process(template, data):
  401. """
  402. Return C code generated from a source template and instruction data.
  403. """
  404. inst_block = _build_inst_block(data)
  405. lookup_block = _build_lookup_block(data)
  406. date = time.asctime(time.gmtime())
  407. result = re_date.sub(r"\1{0} UTC".format(date), template)
  408. result = re_inst.sub(r"\1{0}\3".format(inst_block), result)
  409. result = re_lookup.sub(r"\1{0}\3".format(lookup_block), result)
  410. return result
  411. def main():
  412. """
  413. Main script entry point.
  414. """
  415. with open(SOURCE, "r") as fp:
  416. text = fp.read().decode(ENCODING)
  417. with open(DEST, "r") as fp:
  418. template = fp.read().decode(ENCODING)
  419. data = yaml.load(text)
  420. result = _process(template, data)
  421. with open(DEST, "w") as fp:
  422. fp.write(result.encode(ENCODING))
  423. if __name__ == "__main__":
  424. main()