An emulator, assembler, and disassembler for the Sega Game Gear
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 

435 lines
13 KiB

  1. /* Copyright (C) 2014-2015 Ben Kurtovic <ben.kurtovic@gmail.com>
  2. Released under the terms of the MIT License. See LICENSE for details. */
  3. #include <stdlib.h>
  4. #include <string.h>
  5. #include "tokenizer.h"
  6. #include "directives.h"
  7. #include "instructions.h"
  8. #include "inst_args.h"
  9. #include "parse_util.h"
  10. #include "../logging.h"
  11. #include "../mmu.h"
  12. #include "../rom.h"
  13. /* Internal structs */
  14. typedef struct {
  15. size_t size;
  16. const ASMLine **overlap_table;
  17. const ASMLine **overlap_origins;
  18. const ASMLine *origin;
  19. uint8_t bank;
  20. bool cross_blocks;
  21. } ASMLayoutInfo;
  22. typedef struct {
  23. int8_t slots[MMU_NUM_ROM_BANKS];
  24. const ASMLine *lines[MMU_NUM_ROM_BANKS];
  25. } ASMSlotInfo;
  26. /* Sentinel values for overlap table */
  27. const ASMLine header_sentinel, bounds_sentinel;
  28. /* Typedef for parse_util data parser functions */
  29. typedef bool (*parser_func)(uint8_t**, size_t*, const char*, ssize_t);
  30. /*
  31. Return the address of a given ROM offset when mapped into the given slot.
  32. */
  33. static inline uint16_t map_into_slot(size_t offset, int8_t slot)
  34. {
  35. return (slot * MMU_ROM_BANK_SIZE) + (offset & (MMU_ROM_BANK_SIZE - 1));
  36. }
  37. /*
  38. Return the default slot associated with a given memory bank.
  39. */
  40. static inline int8_t default_bank_slot(uint8_t bank)
  41. {
  42. return bank > 2 ? 2 : bank;
  43. }
  44. /*
  45. Add a given line, representing a label, to the symbol table.
  46. Return NULL on success and an ErrorInfo object on failure (e.g. in the case
  47. of duplicate labels, or labels sharing names with registers/conditions).
  48. */
  49. static ErrorInfo* add_label_to_table(
  50. ASMSymbolTable *symtable, const ASMLine *line, size_t offset, int8_t slot)
  51. {
  52. if (line->length - 1 >= MAX_SYMBOL_SIZE)
  53. return error_info_create(line, ET_SYMBOL, ED_SYM_TOO_LONG);
  54. ASMArgRegister reg;
  55. if (argparse_register(&reg, line->data, line->length - 1))
  56. return error_info_create(line, ET_SYMBOL, ED_SYM_IS_REGISTER);
  57. ASMArgCondition cond;
  58. if (argparse_condition(&cond, line->data, line->length - 1))
  59. return error_info_create(line, ET_SYMBOL, ED_SYM_IS_CONDITION);
  60. char *symbol = strndup(line->data, line->length - 1);
  61. if (!symbol)
  62. OUT_OF_MEMORY()
  63. const ASMSymbol *current = asm_symtable_find(symtable, symbol);
  64. if (current) {
  65. ErrorInfo *ei = error_info_create(line, ET_SYMBOL, ED_SYM_DUPE_LABELS);
  66. error_info_append(ei, current->line);
  67. free(symbol);
  68. return ei;
  69. }
  70. ASMSymbol *label = malloc(sizeof(ASMSymbol));
  71. if (!label)
  72. OUT_OF_MEMORY()
  73. label->offset = map_into_slot(offset,
  74. (slot >= 0) ? slot : default_bank_slot(offset / MMU_ROM_BANK_SIZE));
  75. label->symbol = symbol;
  76. label->line = line;
  77. asm_symtable_insert(symtable, label);
  78. return NULL;
  79. }
  80. /*
  81. Handle an origin directive by updating the offset.
  82. Return NULL on success and an ErrorInfo object on failure.
  83. */
  84. static ErrorInfo* handle_origin_directive(const ASMLine *line, size_t *offset)
  85. {
  86. if (!DIRECTIVE_HAS_ARG(line, DIR_ORIGIN))
  87. return error_info_create(line, ET_PREPROC, ED_PP_NO_ARG);
  88. uint32_t arg;
  89. if (!dparse_uint32_t(&arg, line, DIR_ORIGIN))
  90. return error_info_create(line, ET_PREPROC, ED_PP_BAD_ARG);
  91. if (arg >= ROM_SIZE_MAX)
  92. return error_info_create(line, ET_PREPROC, ED_PP_ARG_RANGE);
  93. *offset = arg;
  94. return NULL;
  95. }
  96. /*
  97. Handle a block directive by updating the offset and slot.
  98. Return NULL on success and an ErrorInfo object on failure.
  99. */
  100. static ErrorInfo* handle_block_directive(
  101. const ASMLine *line, size_t *offset, ASMSlotInfo *si)
  102. {
  103. if (!DIRECTIVE_HAS_ARG(line, DIR_BLOCK))
  104. return error_info_create(line, ET_PREPROC, ED_PP_NO_ARG);
  105. uint8_t *args, bank, slot;
  106. size_t dir_offset = DIRECTIVE_OFFSET(line, DIR_BLOCK) + 1, nargs;
  107. if (!parse_bytes(&args, &nargs, line->data + dir_offset,
  108. line->length - dir_offset))
  109. return error_info_create(line, ET_PREPROC, ED_PP_BAD_ARG);
  110. if (nargs < 1 || nargs > 2)
  111. return free(args), error_info_create(line, ET_PREPROC, ED_PP_BAD_ARG);
  112. bank = args[0];
  113. slot = nargs == 2 ? args[1] : default_bank_slot(bank);
  114. free(args);
  115. if (bank >= MMU_NUM_ROM_BANKS || slot >= MMU_NUM_SLOTS)
  116. return error_info_create(line, ET_PREPROC, ED_PP_ARG_RANGE);
  117. if (bank == 0 && slot != 0)
  118. return error_info_create(line, ET_LAYOUT, ED_LYT_BLOCK0);
  119. if (si->slots[bank] >= 0 && si->slots[bank] != slot) {
  120. ErrorInfo *ei = error_info_create(line, ET_LAYOUT, ED_LYT_SLOTS);
  121. error_info_append(ei, si->lines[bank]);
  122. return ei;
  123. }
  124. *offset = bank * MMU_ROM_BANK_SIZE;
  125. si->slots[bank] = slot;
  126. if (!si->lines[bank])
  127. si->lines[bank] = line;
  128. return NULL;
  129. }
  130. /*
  131. Parse a .space directive, which fills a region with a single byte.
  132. */
  133. static bool parse_space(
  134. uint8_t **result, size_t *length, const char *arg, ssize_t size)
  135. {
  136. uint8_t *bytes;
  137. size_t nbytes;
  138. if (!parse_bytes(&bytes, &nbytes, arg, size))
  139. return false;
  140. if (nbytes < 1 || nbytes > 2) {
  141. free(bytes);
  142. return false;
  143. }
  144. *length = bytes[0];
  145. if (!(*result = malloc(sizeof(uint8_t) * (*length))))
  146. OUT_OF_MEMORY()
  147. memset(*result, nbytes == 2 ? bytes[1] : 0, *length);
  148. free(bytes);
  149. return true;
  150. }
  151. /*
  152. Parse data encoded in a line into an ASMData object.
  153. On success, return NULL and store the instruction in *data_ptr. On failure,
  154. return an ErrorInfo object; *data_ptr is not modified.
  155. */
  156. static ErrorInfo* parse_data(
  157. const ASMLine *line, ASMData **data_ptr, size_t offset)
  158. {
  159. const char *directive;
  160. parser_func parser = (parser_func) parse_string;
  161. if (IS_DIRECTIVE(line, DIR_BYTE)) {
  162. directive = DIR_BYTE;
  163. parser = parse_bytes;
  164. } else if (IS_DIRECTIVE(line, DIR_SPACE)) {
  165. directive = DIR_SPACE;
  166. parser = parse_space;
  167. } else if (IS_DIRECTIVE(line, DIR_ASCII)) {
  168. directive = DIR_ASCII;
  169. } else if (IS_DIRECTIVE(line, DIR_ASCIZ)) {
  170. directive = DIR_ASCIZ;
  171. } else if (IS_DIRECTIVE(line, DIR_ASCIIZ)) {
  172. directive = DIR_ASCIIZ;
  173. } else {
  174. return error_info_create(line, ET_PREPROC, ED_PP_UNKNOWN);
  175. }
  176. if (!DIRECTIVE_HAS_ARG(line, directive))
  177. return error_info_create(line, ET_PREPROC, ED_PP_NO_ARG);
  178. size_t dir_offset = DIRECTIVE_OFFSET(line, directive) + 1;
  179. const char *arg = line->data + dir_offset;
  180. size_t arglen = line->length - dir_offset;
  181. ASMData *data = malloc(sizeof(ASMData));
  182. if (!data)
  183. OUT_OF_MEMORY()
  184. data->loc.offset = offset;
  185. data->next = NULL;
  186. if (!parser(&data->bytes, &data->loc.length, arg, arglen)) {
  187. free(data);
  188. return error_info_create(line, ET_PREPROC, ED_PP_BAD_ARG);
  189. }
  190. *data_ptr = data;
  191. return NULL;
  192. }
  193. /*
  194. Parse an instruction encoded in a line into an ASMInstruction object.
  195. On success, return NULL and store the instruction in *inst_ptr. On failure,
  196. return an ErrorInfo object; *inst_ptr is not modified.
  197. */
  198. static ErrorInfo* parse_instruction(
  199. const ASMLine *line, ASMInstruction **inst_ptr, size_t offset)
  200. {
  201. char mnemonic[MAX_MNEMONIC_SIZE] = {0};
  202. size_t i = 0;
  203. while (i < line->length) {
  204. char c = line->data[i];
  205. if (c == ' ')
  206. break;
  207. if (i >= MAX_MNEMONIC_SIZE)
  208. return error_info_create(line, ET_PARSER, ED_PS_OP_TOO_LONG);
  209. if ((c < 'a' || c > 'z') && (c < '0' || c > '9'))
  210. return error_info_create(line, ET_PARSER, ED_PS_OP_INVALID);
  211. mnemonic[i++] = c;
  212. }
  213. if (i < MIN_MNEMONIC_SIZE)
  214. return error_info_create(line, ET_PARSER, ED_PS_OP_TOO_SHORT);
  215. if (i + 1 < line->length)
  216. i++; // Advance past space
  217. uint8_t *bytes;
  218. size_t arglen = line->length - i, length;
  219. char *argstart = arglen > 0 ? line->data + i : NULL, *symbol = NULL;
  220. ASMInstParser parser = get_inst_parser(mnemonic);
  221. if (!parser)
  222. return error_info_create(line, ET_PARSER, ED_PS_OP_UNKNOWN);
  223. ASMErrorDesc edesc = parser(&bytes, &length, &symbol, argstart, arglen);
  224. if (edesc != ED_NONE)
  225. return error_info_create(line, ET_PARSER, edesc);
  226. ASMInstruction *inst = malloc(sizeof(ASMInstruction));
  227. if (!inst)
  228. OUT_OF_MEMORY()
  229. inst->loc.offset = offset;
  230. inst->loc.length = length;
  231. inst->bytes = bytes;
  232. inst->symbol = symbol;
  233. inst->line = line;
  234. inst->next = NULL;
  235. *inst_ptr = inst;
  236. return NULL;
  237. }
  238. /*
  239. Check if the given object location is legal.
  240. Checks include ROM size bounding, overlapping with existing objects, and
  241. block-crossing assuming the .cross_blocks directive has not been specified.
  242. On success, return NULL and add the location to the overlap table.
  243. On failure, return an ErrorInfo object.
  244. */
  245. static ErrorInfo* check_layout(
  246. ASMLayoutInfo *li, const ASMLocation *loc, const ASMLine *line)
  247. {
  248. const ASMLine *clash = NULL, *clash_origin;
  249. if (loc->offset + loc->length > li->size) {
  250. clash = &bounds_sentinel;
  251. } else {
  252. for (size_t i = 0; i < loc->length; i++) {
  253. if (li->overlap_table[loc->offset + i]) {
  254. clash = li->overlap_table[loc->offset + i];
  255. clash_origin = li->overlap_origins[loc->offset + i];
  256. break;
  257. }
  258. }
  259. }
  260. if (clash) {
  261. ErrorInfo *ei = error_info_create(line, ET_LAYOUT,
  262. (clash == &header_sentinel) ? ED_LYT_OVERLAP_HEAD :
  263. (clash == &bounds_sentinel) ? ED_LYT_BOUNDS : ED_LYT_OVERLAP);
  264. if (li->origin)
  265. error_info_append(ei, li->origin);
  266. if (clash != &header_sentinel && clash != &bounds_sentinel) {
  267. error_info_append(ei, clash);
  268. if (clash_origin)
  269. error_info_append(ei, clash_origin);
  270. }
  271. return ei;
  272. }
  273. uint8_t bank = (loc->offset + loc->length - 1) / MMU_ROM_BANK_SIZE;
  274. if (bank != li->bank && !li->cross_blocks) {
  275. ErrorInfo *ei = error_info_create(line, ET_LAYOUT, ED_LYT_BLOCK_CROSS);
  276. if (li->origin)
  277. error_info_append(ei, li->origin);
  278. return ei;
  279. }
  280. for (size_t i = 0; i < loc->length; i++) {
  281. li->overlap_table[loc->offset + i] = line;
  282. li->overlap_origins[loc->offset + i] = li->origin;
  283. }
  284. return NULL;
  285. }
  286. /*
  287. Tokenize ASMLines into ASMInstructions and ASMData.
  288. NULL is returned on success and an ErrorInfo object is returned on failure.
  289. state->instructions, state->data, and state->symtable may or may not be
  290. modified regardless of success.
  291. */
  292. ErrorInfo* tokenize(AssemblerState *state)
  293. {
  294. ASMLayoutInfo li = {
  295. .size = state->rom_size ? state->rom_size : ROM_SIZE_MAX,
  296. .origin = NULL, .bank = 0, .cross_blocks = state->cross_blocks
  297. };
  298. li.overlap_table = calloc(li.size, sizeof(const ASMLine*));
  299. li.overlap_origins = calloc(li.size, sizeof(const ASMLine*));
  300. if (!li.overlap_table || !li.overlap_origins)
  301. OUT_OF_MEMORY()
  302. ErrorInfo *ei = NULL;
  303. ASMInstruction dummy_inst = {.next = NULL}, *inst, *prev_inst = &dummy_inst;
  304. ASMData dummy_data = {.next = NULL}, *data, *prev_data = &dummy_data;
  305. const ASMLine *line = state->lines;
  306. size_t offset = 0;
  307. ASMSlotInfo si = {.lines = {0}};
  308. for (size_t i = 0; i < HEADER_SIZE; i++)
  309. li.overlap_table[state->header.offset + i] = &header_sentinel;
  310. memset(si.slots, -1, MMU_NUM_ROM_BANKS);
  311. while (line) {
  312. if (line->is_label) {
  313. if (offset >= li.size) {
  314. ei = error_info_create(line, ET_LAYOUT, ED_LYT_BOUNDS);
  315. goto cleanup;
  316. }
  317. int8_t slot = si.slots[offset / MMU_NUM_ROM_BANKS];
  318. if ((ei = add_label_to_table(state->symtable, line, offset, slot)))
  319. goto cleanup;
  320. }
  321. else if (IS_LOCAL_DIRECTIVE(line)) {
  322. if (IS_DIRECTIVE(line, DIR_ORIGIN)) {
  323. if ((ei = handle_origin_directive(line, &offset)))
  324. goto cleanup;
  325. li.origin = line;
  326. li.bank = offset / MMU_ROM_BANK_SIZE;
  327. }
  328. else if (IS_DIRECTIVE(line, DIR_BLOCK)) {
  329. if ((ei = handle_block_directive(line, &offset, &si)))
  330. goto cleanup;
  331. li.origin = line;
  332. li.bank = offset / MMU_ROM_BANK_SIZE;
  333. }
  334. else {
  335. if ((ei = parse_data(line, &data, offset)))
  336. goto cleanup;
  337. offset += data->loc.length;
  338. prev_data->next = data;
  339. prev_data = data;
  340. if ((ei = check_layout(&li, &data->loc, line)))
  341. goto cleanup;
  342. }
  343. }
  344. else {
  345. if ((ei = parse_instruction(line, &inst, offset)))
  346. goto cleanup;
  347. offset += inst->loc.length;
  348. prev_inst->next = inst;
  349. prev_inst = inst;
  350. if ((ei = check_layout(&li, &inst->loc, line)))
  351. goto cleanup;
  352. }
  353. line = line->next;
  354. }
  355. cleanup:
  356. state->instructions = dummy_inst.next;
  357. state->data = dummy_data.next;
  358. free(li.overlap_table);
  359. free(li.overlap_origins);
  360. return ei;
  361. }