An emulator, assembler, and disassembler for the Sega Game Gear
Vous ne pouvez pas sélectionner plus de 25 sujets Les noms de sujets doivent commencer par une lettre ou un nombre, peuvent contenir des tirets ('-') et peuvent comporter jusqu'à 35 caractères.

tokenizer.c 16 KiB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540
  1. /* Copyright (C) 2014-2016 Ben Kurtovic <ben.kurtovic@gmail.com>
  2. Released under the terms of the MIT License. See LICENSE for details. */
  3. #include <stdlib.h>
  4. #include <string.h>
  5. #include "tokenizer.h"
  6. #include "directives.h"
  7. #include "instructions.h"
  8. #include "inst_args.h"
  9. #include "parse_util.h"
  10. #include "../mmu.h"
  11. #include "../rom.h"
  12. #include "../util.h"
  13. /* Internal structs */
  14. typedef struct {
  15. size_t size;
  16. const ASMLine **overlap_table;
  17. const ASMLine **overlap_origins;
  18. const ASMLine *origin;
  19. uint8_t bank;
  20. bool cross_blocks;
  21. } ASMLayoutInfo;
  22. typedef struct {
  23. int8_t slots[MMU_NUM_ROM_BANKS];
  24. const ASMLine *lines[MMU_NUM_ROM_BANKS];
  25. } ASMSlotInfo;
  26. /* Sentinel values for overlap table */
  27. const ASMLine header_sentinel, bounds_sentinel;
  28. /* Typedef for parse_util data parser functions */
  29. typedef bool (*parser_func)(uint8_t**, size_t*, const char*, ssize_t);
  30. /*
  31. Return the address of a given ROM offset when mapped into the given slot.
  32. */
  33. static inline uint16_t map_into_slot(size_t offset, int8_t slot)
  34. {
  35. return (slot * MMU_ROM_BANK_SIZE) + (offset & (MMU_ROM_BANK_SIZE - 1));
  36. }
  37. /*
  38. Return the default slot associated with a given memory bank.
  39. */
  40. static inline int8_t default_bank_slot(uint8_t bank)
  41. {
  42. return bank > 2 ? 2 : bank;
  43. }
  44. /*
  45. Initialize an ASMLayoutInfo object.
  46. */
  47. static void init_layout_info(ASMLayoutInfo *li, AssemblerState *state)
  48. {
  49. li->size = state->rom_size ? state->rom_size : ROM_SIZE_MAX;
  50. li->origin = NULL;
  51. li->bank = 0;
  52. li->cross_blocks = state->cross_blocks;
  53. li->overlap_table = cr_calloc(li->size, sizeof(const ASMLine*));
  54. li->overlap_origins = cr_calloc(li->size, sizeof(const ASMLine*));
  55. for (size_t i = 0; i < HEADER_SIZE; i++)
  56. li->overlap_table[state->header.offset + i] = &header_sentinel;
  57. }
  58. /*
  59. Free the resources allocated by an ASMLayoutInfo object.
  60. */
  61. static void free_layout_info(ASMLayoutInfo *li)
  62. {
  63. free(li->overlap_table);
  64. free(li->overlap_origins);
  65. }
  66. /*
  67. Add a given line, representing a label, to the symbol table.
  68. Return NULL on success and an ErrorInfo object on failure (e.g. in the case
  69. of duplicate labels, or labels sharing names with registers/conditions).
  70. */
  71. static ErrorInfo* add_label_to_table(
  72. ASMSymbolTable *symtable, const ASMLine *line, size_t offset, int8_t slot)
  73. {
  74. if (line->length - 1 >= MAX_SYMBOL_SIZE)
  75. return error_info_create(line, ET_SYMBOL, ED_SYM_TOO_LONG);
  76. ASMArgParseInfo info = {.arg = line->data, .size = line->length - 1};
  77. ASMArgRegister reg;
  78. if (argparse_register(&reg, info))
  79. return error_info_create(line, ET_SYMBOL, ED_SYM_IS_REGISTER);
  80. ASMArgCondition cond;
  81. if (argparse_condition(&cond, info))
  82. return error_info_create(line, ET_SYMBOL, ED_SYM_IS_CONDITION);
  83. char *symbol = cr_strndup(line->data, line->length - 1);
  84. const ASMSymbol *current = asm_symtable_find(symtable, symbol);
  85. if (current) {
  86. ErrorInfo *ei = error_info_create(line, ET_SYMBOL, ED_SYM_DUPE_LABELS);
  87. error_info_append(ei, current->line);
  88. free(symbol);
  89. return ei;
  90. }
  91. ASMSymbol *label = cr_malloc(sizeof(ASMSymbol));
  92. label->offset = map_into_slot(offset,
  93. (slot >= 0) ? slot : default_bank_slot(offset / MMU_ROM_BANK_SIZE));
  94. label->symbol = symbol;
  95. label->line = line;
  96. asm_symtable_insert(symtable, label);
  97. return NULL;
  98. }
  99. /*
  100. Handle a define directive by adding an entry to the define table.
  101. Return NULL on success and an ErrorInfo object on failure.
  102. */
  103. static ErrorInfo* handle_define_directive(
  104. const ASMLine *line, ASMDefineTable *deftab)
  105. {
  106. if (!DIRECTIVE_HAS_ARG(line, DIR_DEFINE))
  107. return error_info_create(line, ET_PREPROC, ED_PP_NO_ARG);
  108. size_t start = DIRECTIVE_OFFSET(line, DIR_DEFINE) + 1, i;
  109. for (i = start; i < line->length; i++) {
  110. if (!is_valid_symbol_char(line->data[i], i == start)) {
  111. if (line->data[i] == ' ' && i > start) {
  112. i++;
  113. break;
  114. }
  115. return error_info_create(line, ET_PREPROC, ED_PP_BAD_ARG);
  116. }
  117. }
  118. if (i >= line->length) // Missing value for define
  119. return error_info_create(line, ET_PREPROC, ED_PP_NO_ARG);
  120. const char *key = line->data + start;
  121. size_t keylen = i - start - 1;
  122. const ASMDefine *current = asm_deftable_find(deftab, key, keylen);
  123. if (current) {
  124. ErrorInfo *ei = error_info_create(line, ET_PREPROC, ED_PP_DUPLICATE);
  125. error_info_append(ei, current->line);
  126. return ei;
  127. }
  128. ASMArgImmediate imm;
  129. ASMArgParseInfo info = {
  130. .arg = line->data + i, .size = line->length - i, .deftable = deftab};
  131. if (!argparse_immediate(&imm, info) || imm.is_label)
  132. return error_info_create(line, ET_PREPROC, ED_PP_BAD_ARG);
  133. ASMDefine *define = cr_malloc(sizeof(ASMDefine));
  134. define->name = cr_strndup(key, keylen);
  135. define->value = imm;
  136. define->line = line;
  137. asm_deftable_insert(deftab, define);
  138. return NULL;
  139. }
  140. /*
  141. Handle an undefine directive by remove an entry in the define table.
  142. Return NULL on success and an ErrorInfo object on failure.
  143. */
  144. static ErrorInfo* handle_undef_directive(
  145. const ASMLine *line, ASMDefineTable *deftab)
  146. {
  147. if (!DIRECTIVE_HAS_ARG(line, DIR_UNDEF))
  148. return error_info_create(line, ET_PREPROC, ED_PP_NO_ARG);
  149. size_t offset = DIRECTIVE_OFFSET(line, DIR_UNDEF) + 1;
  150. const char *arg = line->data + offset;
  151. size_t size = line->length - offset, i;
  152. for (i = 0; i < size; i++) {
  153. if (!is_valid_symbol_char(arg[i], i == 0))
  154. return error_info_create(line, ET_PREPROC, ED_PP_BAD_ARG);
  155. }
  156. asm_deftable_remove(deftab, arg, size);
  157. return NULL;
  158. }
  159. /*
  160. Handle an origin directive by updating the offset.
  161. Return NULL on success and an ErrorInfo object on failure.
  162. */
  163. static ErrorInfo* handle_origin_directive(const ASMLine *line, size_t *offset)
  164. {
  165. if (!DIRECTIVE_HAS_ARG(line, DIR_ORIGIN))
  166. return error_info_create(line, ET_PREPROC, ED_PP_NO_ARG);
  167. uint32_t arg;
  168. if (!dparse_uint32_t(&arg, line, DIR_ORIGIN))
  169. return error_info_create(line, ET_PREPROC, ED_PP_BAD_ARG);
  170. if (arg >= ROM_SIZE_MAX)
  171. return error_info_create(line, ET_PREPROC, ED_PP_ARG_RANGE);
  172. *offset = arg;
  173. return NULL;
  174. }
  175. /*
  176. Handle a block directive by updating the offset and slot.
  177. Return NULL on success and an ErrorInfo object on failure.
  178. */
  179. static ErrorInfo* handle_block_directive(
  180. const ASMLine *line, size_t *offset, ASMSlotInfo *si)
  181. {
  182. if (!DIRECTIVE_HAS_ARG(line, DIR_BLOCK))
  183. return error_info_create(line, ET_PREPROC, ED_PP_NO_ARG);
  184. uint8_t *args, bank, slot;
  185. size_t dir_offset = DIRECTIVE_OFFSET(line, DIR_BLOCK) + 1, nargs;
  186. if (!parse_bytes(&args, &nargs, line->data + dir_offset,
  187. line->length - dir_offset))
  188. return error_info_create(line, ET_PREPROC, ED_PP_BAD_ARG);
  189. if (nargs < 1 || nargs > 2)
  190. return free(args), error_info_create(line, ET_PREPROC, ED_PP_BAD_ARG);
  191. bank = args[0];
  192. slot = nargs == 2 ? args[1] : default_bank_slot(bank);
  193. free(args);
  194. if (bank >= MMU_NUM_ROM_BANKS || slot >= MMU_NUM_SLOTS)
  195. return error_info_create(line, ET_PREPROC, ED_PP_ARG_RANGE);
  196. if (bank == 0 && slot != 0)
  197. return error_info_create(line, ET_LAYOUT, ED_LYT_BLOCK0);
  198. if (si->slots[bank] >= 0 && si->slots[bank] != slot) {
  199. ErrorInfo *ei = error_info_create(line, ET_LAYOUT, ED_LYT_SLOTS);
  200. error_info_append(ei, si->lines[bank]);
  201. return ei;
  202. }
  203. *offset = bank * MMU_ROM_BANK_SIZE;
  204. si->slots[bank] = slot;
  205. if (!si->lines[bank])
  206. si->lines[bank] = line;
  207. return NULL;
  208. }
  209. /*
  210. Parse a .space directive, which fills a region with a single byte.
  211. */
  212. static bool parse_space(
  213. uint8_t **result, size_t *length, const char *arg, ssize_t size)
  214. {
  215. uint8_t *bytes;
  216. size_t nbytes;
  217. if (!parse_bytes(&bytes, &nbytes, arg, size))
  218. return false;
  219. if (nbytes < 1 || nbytes > 2) {
  220. free(bytes);
  221. return false;
  222. }
  223. *length = bytes[0];
  224. *result = cr_malloc(sizeof(uint8_t) * (*length));
  225. memset(*result, nbytes == 2 ? bytes[1] : 0, *length);
  226. free(bytes);
  227. return true;
  228. }
  229. /*
  230. Parse a string like parse_string(), but null-terminate it.
  231. */
  232. static bool parse_cstring(
  233. char **result, size_t *length, const char *arg, ssize_t size)
  234. {
  235. if (!parse_string(result, length, arg, size))
  236. return false;
  237. (*length)++;
  238. *result = cr_realloc(*result, sizeof(char) * (*length));
  239. (*result)[*length - 1] = '\0';
  240. return true;
  241. }
  242. /*
  243. Parse data encoded in a line into an ASMData object.
  244. On success, return NULL and store the instruction in *data_ptr. On failure,
  245. return an ErrorInfo object; *data_ptr is not modified.
  246. */
  247. static ErrorInfo* parse_data(
  248. const ASMLine *line, ASMData **data_ptr, size_t offset)
  249. {
  250. const char *directive;
  251. parser_func parser;
  252. if (IS_DIRECTIVE(line, DIR_BYTE)) {
  253. directive = DIR_BYTE;
  254. parser = parse_bytes;
  255. } else if (IS_DIRECTIVE(line, DIR_SPACE)) {
  256. directive = DIR_SPACE;
  257. parser = parse_space;
  258. } else if (IS_DIRECTIVE(line, DIR_ASCII)) {
  259. directive = DIR_ASCII;
  260. parser = (parser_func) parse_string;
  261. } else if (IS_DIRECTIVE(line, DIR_ASCIZ)) {
  262. directive = DIR_ASCIZ;
  263. parser = (parser_func) parse_cstring;
  264. } else if (IS_DIRECTIVE(line, DIR_ASCIIZ)) {
  265. directive = DIR_ASCIIZ;
  266. parser = (parser_func) parse_cstring;
  267. } else {
  268. return error_info_create(line, ET_PREPROC, ED_PP_UNKNOWN);
  269. }
  270. if (!DIRECTIVE_HAS_ARG(line, directive))
  271. return error_info_create(line, ET_PREPROC, ED_PP_NO_ARG);
  272. size_t dir_offset = DIRECTIVE_OFFSET(line, directive) + 1;
  273. const char *arg = line->data + dir_offset;
  274. size_t arglen = line->length - dir_offset;
  275. ASMData *data = cr_malloc(sizeof(ASMData));
  276. data->loc.offset = offset;
  277. data->next = NULL;
  278. if (!parser(&data->bytes, &data->loc.length, arg, arglen)) {
  279. free(data);
  280. return error_info_create(line, ET_PREPROC, ED_PP_BAD_ARG);
  281. }
  282. *data_ptr = data;
  283. return NULL;
  284. }
  285. /*
  286. Parse an instruction encoded in a line into an ASMInstruction object.
  287. On success, return NULL and store the instruction in *inst_ptr. On failure,
  288. return an ErrorInfo object; *inst_ptr is not modified.
  289. */
  290. static ErrorInfo* parse_instruction(
  291. const ASMLine *line, ASMInstruction **inst_ptr, size_t offset,
  292. ASMDefineTable *deftab)
  293. {
  294. char mnemonic[MAX_MNEMONIC_SIZE] = {0};
  295. size_t i = 0;
  296. while (i < line->length) {
  297. char c = line->data[i];
  298. if (c == ' ')
  299. break;
  300. if (i >= MAX_MNEMONIC_SIZE)
  301. return error_info_create(line, ET_PARSER, ED_PS_OP_TOO_LONG);
  302. if ((c < 'a' || c > 'z') && (c < '0' || c > '9'))
  303. return error_info_create(line, ET_PARSER, ED_PS_OP_INVALID);
  304. mnemonic[i++] = c;
  305. }
  306. if (i < MIN_MNEMONIC_SIZE)
  307. return error_info_create(line, ET_PARSER, ED_PS_OP_TOO_SHORT);
  308. if (i + 1 < line->length)
  309. i++; // Advance past space
  310. uint8_t *bytes;
  311. size_t arglen = line->length - i, length;
  312. char *argstart = arglen > 0 ? line->data + i : NULL, *symbol = NULL;
  313. ASMInstParser parser = get_inst_parser(mnemonic);
  314. if (!parser)
  315. return error_info_create(line, ET_PARSER, ED_PS_OP_UNKNOWN);
  316. ASMArgParseInfo ai = {.arg = argstart, .size = arglen, .deftable = deftab};
  317. ASMErrorDesc edesc = parser(&bytes, &length, &symbol, ai);
  318. if (edesc != ED_NONE)
  319. return error_info_create(line, ET_PARSER, edesc);
  320. ASMInstruction *inst = cr_malloc(sizeof(ASMInstruction));
  321. inst->loc.offset = offset;
  322. inst->loc.length = length;
  323. inst->bytes = bytes;
  324. inst->symbol = symbol;
  325. inst->line = line;
  326. inst->next = NULL;
  327. *inst_ptr = inst;
  328. return NULL;
  329. }
  330. /*
  331. Check if the given object location is legal.
  332. Checks include ROM size bounding, overlapping with existing objects, and
  333. block-crossing assuming the .cross_blocks directive has not been specified.
  334. On success, return NULL and add the location to the overlap table.
  335. On failure, return an ErrorInfo object.
  336. */
  337. static ErrorInfo* check_layout(
  338. ASMLayoutInfo *li, const ASMLocation *loc, const ASMLine *line)
  339. {
  340. const ASMLine *clash = NULL, *clash_origin;
  341. if (loc->offset + loc->length > li->size) {
  342. clash = &bounds_sentinel;
  343. } else {
  344. for (size_t i = 0; i < loc->length; i++) {
  345. if (li->overlap_table[loc->offset + i]) {
  346. clash = li->overlap_table[loc->offset + i];
  347. clash_origin = li->overlap_origins[loc->offset + i];
  348. break;
  349. }
  350. }
  351. }
  352. if (clash) {
  353. ErrorInfo *ei = error_info_create(line, ET_LAYOUT,
  354. (clash == &header_sentinel) ? ED_LYT_OVERLAP_HEAD :
  355. (clash == &bounds_sentinel) ? ED_LYT_BOUNDS : ED_LYT_OVERLAP);
  356. if (li->origin)
  357. error_info_append(ei, li->origin);
  358. if (clash != &header_sentinel && clash != &bounds_sentinel) {
  359. error_info_append(ei, clash);
  360. if (clash_origin)
  361. error_info_append(ei, clash_origin);
  362. }
  363. return ei;
  364. }
  365. uint8_t bank = (loc->offset + loc->length - 1) / MMU_ROM_BANK_SIZE;
  366. if (bank != li->bank && !li->cross_blocks) {
  367. ErrorInfo *ei = error_info_create(line, ET_LAYOUT, ED_LYT_BLOCK_CROSS);
  368. if (li->origin)
  369. error_info_append(ei, li->origin);
  370. return ei;
  371. }
  372. for (size_t i = 0; i < loc->length; i++) {
  373. li->overlap_table[loc->offset + i] = line;
  374. li->overlap_origins[loc->offset + i] = li->origin;
  375. }
  376. return NULL;
  377. }
  378. /*
  379. Tokenize ASMLines into ASMInstructions and ASMData.
  380. NULL is returned on success and an ErrorInfo object is returned on failure.
  381. state->instructions, state->data, and state->symtable may or may not be
  382. modified regardless of success.
  383. */
  384. ErrorInfo* tokenize(AssemblerState *state)
  385. {
  386. ErrorInfo *ei = NULL;
  387. ASMLayoutInfo li;
  388. ASMSlotInfo si = {.lines = {0}};
  389. ASMDefineTable *deftab = asm_deftable_new();
  390. ASMInstruction dummy_inst = {.next = NULL}, *inst, *prev_inst = &dummy_inst;
  391. ASMData dummy_data = {.next = NULL}, *data, *prev_data = &dummy_data;
  392. const ASMLine *line = state->lines;
  393. size_t offset = 0;
  394. DEBUG("Running tokenizer")
  395. init_layout_info(&li, state);
  396. memset(si.slots, -1, MMU_NUM_ROM_BANKS);
  397. while (line) {
  398. if (line->is_label) {
  399. if (offset >= li.size) {
  400. ei = error_info_create(line, ET_LAYOUT, ED_LYT_BOUNDS);
  401. goto cleanup;
  402. }
  403. int8_t slot = si.slots[offset / MMU_NUM_ROM_BANKS];
  404. if ((ei = add_label_to_table(state->symtable, line, offset, slot)))
  405. goto cleanup;
  406. }
  407. else if (IS_LOCAL_DIRECTIVE(line)) {
  408. if (IS_DIRECTIVE(line, DIR_DEFINE)) {
  409. if ((ei = handle_define_directive(line, deftab)))
  410. goto cleanup;
  411. }
  412. else if (IS_DIRECTIVE(line, DIR_UNDEF)) {
  413. if ((ei = handle_undef_directive(line, deftab)))
  414. goto cleanup;
  415. }
  416. else if (IS_DIRECTIVE(line, DIR_ORIGIN)) {
  417. if ((ei = handle_origin_directive(line, &offset)))
  418. goto cleanup;
  419. li.origin = line;
  420. li.bank = offset / MMU_ROM_BANK_SIZE;
  421. }
  422. else if (IS_DIRECTIVE(line, DIR_BLOCK)) {
  423. if ((ei = handle_block_directive(line, &offset, &si)))
  424. goto cleanup;
  425. li.origin = line;
  426. li.bank = offset / MMU_ROM_BANK_SIZE;
  427. }
  428. else {
  429. if ((ei = parse_data(line, &data, offset)))
  430. goto cleanup;
  431. offset += data->loc.length;
  432. prev_data->next = data;
  433. prev_data = data;
  434. if ((ei = check_layout(&li, &data->loc, line)))
  435. goto cleanup;
  436. }
  437. }
  438. else {
  439. if ((ei = parse_instruction(line, &inst, offset, deftab)))
  440. goto cleanup;
  441. offset += inst->loc.length;
  442. prev_inst->next = inst;
  443. prev_inst = inst;
  444. if ((ei = check_layout(&li, &inst->loc, line)))
  445. goto cleanup;
  446. }
  447. line = line->next;
  448. }
  449. cleanup:
  450. state->instructions = dummy_inst.next;
  451. state->data = dummy_data.next;
  452. free_layout_info(&li);
  453. asm_deftable_free(deftab);
  454. return ei;
  455. }