An emulator, assembler, and disassembler for the Sega Game Gear
Nie możesz wybrać więcej, niż 25 tematów Tematy muszą się zaczynać od litery lub cyfry, mogą zawierać myślniki ('-') i mogą mieć do 35 znaków.

tokenizer.c 16 KiB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540
  1. /* Copyright (C) 2014-2016 Ben Kurtovic <ben.kurtovic@gmail.com>
  2. Released under the terms of the MIT License. See LICENSE for details. */
  3. #include <stdlib.h>
  4. #include <string.h>
  5. #include "tokenizer.h"
  6. #include "directives.h"
  7. #include "instructions.h"
  8. #include "inst_args.h"
  9. #include "parse_util.h"
  10. #include "../mmu.h"
  11. #include "../rom.h"
  12. #include "../util.h"
  13. /* Internal structs */
  14. typedef struct {
  15. size_t size;
  16. const ASMLine **overlap_table;
  17. const ASMLine **overlap_origins;
  18. const ASMLine *origin;
  19. uint8_t bank;
  20. bool cross_blocks;
  21. } ASMLayoutInfo;
  22. typedef struct {
  23. int8_t slots[MMU_NUM_ROM_BANKS];
  24. const ASMLine *lines[MMU_NUM_ROM_BANKS];
  25. } ASMSlotInfo;
  26. /* Sentinel values for overlap table */
  27. const ASMLine header_sentinel, bounds_sentinel;
  28. /* Typedef for parse_util data parser functions */
  29. typedef bool (*parser_func)(uint8_t**, size_t*, const char*, ssize_t);
  30. /*
  31. Return the address of a given ROM offset when mapped into the given slot.
  32. */
  33. static inline uint16_t map_into_slot(size_t offset, int8_t slot)
  34. {
  35. return (slot * MMU_ROM_BANK_SIZE) + (offset & (MMU_ROM_BANK_SIZE - 1));
  36. }
  37. /*
  38. Return the default slot associated with a given memory bank.
  39. */
  40. static inline int8_t default_bank_slot(uint8_t bank)
  41. {
  42. return bank > 2 ? 2 : bank;
  43. }
  44. /*
  45. Initialize an ASMLayoutInfo object.
  46. */
  47. static void init_layout_info(ASMLayoutInfo *li, AssemblerState *state)
  48. {
  49. li->size = state->rom_size ? state->rom_size : ROM_SIZE_MAX;
  50. li->origin = NULL;
  51. li->bank = 0;
  52. li->cross_blocks = state->cross_blocks;
  53. li->overlap_table = cr_calloc(li->size, sizeof(const ASMLine*));
  54. li->overlap_origins = cr_calloc(li->size, sizeof(const ASMLine*));
  55. for (size_t i = 0; i < HEADER_SIZE; i++)
  56. li->overlap_table[state->header.offset + i] = &header_sentinel;
  57. }
  58. /*
  59. Free the resources allocated by an ASMLayoutInfo object.
  60. */
  61. static void free_layout_info(ASMLayoutInfo *li)
  62. {
  63. free(li->overlap_table);
  64. free(li->overlap_origins);
  65. }
  66. /*
  67. Add a given line, representing a label, to the symbol table.
  68. Return NULL on success and an ErrorInfo object on failure (e.g. in the case
  69. of duplicate labels, or labels sharing names with registers/conditions).
  70. */
  71. static ErrorInfo* add_label_to_table(
  72. ASMSymbolTable *symtable, const ASMLine *line, size_t offset, int8_t slot)
  73. {
  74. if (line->length - 1 >= MAX_SYMBOL_SIZE)
  75. return error_info_create(line, ET_SYMBOL, ED_SYM_TOO_LONG);
  76. ASMArgParseInfo info = {.arg = line->data, .size = line->length - 1};
  77. ASMArgRegister reg;
  78. if (argparse_register(&reg, info))
  79. return error_info_create(line, ET_SYMBOL, ED_SYM_IS_REGISTER);
  80. ASMArgCondition cond;
  81. if (argparse_condition(&cond, info))
  82. return error_info_create(line, ET_SYMBOL, ED_SYM_IS_CONDITION);
  83. char *symbol = cr_strndup(line->data, line->length - 1);
  84. const ASMSymbol *current = asm_symtable_find(symtable, symbol);
  85. if (current) {
  86. ErrorInfo *ei = error_info_create(line, ET_SYMBOL, ED_SYM_DUPE_LABELS);
  87. error_info_append(ei, current->line);
  88. free(symbol);
  89. return ei;
  90. }
  91. ASMSymbol *label = cr_malloc(sizeof(ASMSymbol));
  92. label->offset = map_into_slot(offset,
  93. (slot >= 0) ? slot : default_bank_slot(offset / MMU_ROM_BANK_SIZE));
  94. label->symbol = symbol;
  95. label->line = line;
  96. asm_symtable_insert(symtable, label);
  97. return NULL;
  98. }
  99. /*
  100. Handle a define directive by adding an entry to the define table.
  101. Return NULL on success and an ErrorInfo object on failure.
  102. */
  103. static ErrorInfo* handle_define_directive(
  104. const ASMLine *line, ASMDefineTable *deftab)
  105. {
  106. if (!DIRECTIVE_HAS_ARG(line, DIR_DEFINE))
  107. return error_info_create(line, ET_PREPROC, ED_PP_NO_ARG);
  108. size_t start = DIRECTIVE_OFFSET(line, DIR_DEFINE) + 1, i;
  109. for (i = start; i < line->length; i++) {
  110. if (!is_valid_symbol_char(line->data[i], i == start)) {
  111. if (line->data[i] == ' ' && i > start) {
  112. i++;
  113. break;
  114. }
  115. return error_info_create(line, ET_PREPROC, ED_PP_BAD_ARG);
  116. }
  117. }
  118. if (i >= line->length) // Missing value for define
  119. return error_info_create(line, ET_PREPROC, ED_PP_NO_ARG);
  120. const char *key = line->data + start;
  121. size_t keylen = i - start - 1;
  122. const ASMDefine *current = asm_deftable_find(deftab, key, keylen);
  123. if (current) {
  124. ErrorInfo *ei = error_info_create(line, ET_PREPROC, ED_PP_DUPLICATE);
  125. error_info_append(ei, current->line);
  126. return ei;
  127. }
  128. ASMArgImmediate imm;
  129. ASMArgParseInfo info = {
  130. .arg = line->data + i, .size = line->length - i, .deftable = deftab};
  131. if (!argparse_immediate(&imm, info) || imm.is_label)
  132. return error_info_create(line, ET_PREPROC, ED_PP_BAD_ARG);
  133. ASMDefine *define = cr_malloc(sizeof(ASMDefine));
  134. define->name = cr_strndup(key, keylen);
  135. define->value = imm;
  136. define->line = line;
  137. asm_deftable_insert(deftab, define);
  138. return NULL;
  139. }
  140. /*
  141. Handle an undefine directive by remove an entry in the define table.
  142. Return NULL on success and an ErrorInfo object on failure.
  143. */
  144. static ErrorInfo* handle_undef_directive(
  145. const ASMLine *line, ASMDefineTable *deftab)
  146. {
  147. if (!DIRECTIVE_HAS_ARG(line, DIR_UNDEF))
  148. return error_info_create(line, ET_PREPROC, ED_PP_NO_ARG);
  149. size_t offset = DIRECTIVE_OFFSET(line, DIR_UNDEF) + 1;
  150. const char *arg = line->data + offset;
  151. size_t size = line->length - offset, i;
  152. for (i = 0; i < size; i++) {
  153. if (!is_valid_symbol_char(arg[i], i == 0))
  154. return error_info_create(line, ET_PREPROC, ED_PP_BAD_ARG);
  155. }
  156. asm_deftable_remove(deftab, arg, size);
  157. return NULL;
  158. }
  159. /*
  160. Handle an origin directive by updating the offset.
  161. Return NULL on success and an ErrorInfo object on failure.
  162. */
  163. static ErrorInfo* handle_origin_directive(const ASMLine *line, size_t *offset)
  164. {
  165. if (!DIRECTIVE_HAS_ARG(line, DIR_ORIGIN))
  166. return error_info_create(line, ET_PREPROC, ED_PP_NO_ARG);
  167. uint32_t arg;
  168. if (!dparse_uint32_t(&arg, line, DIR_ORIGIN))
  169. return error_info_create(line, ET_PREPROC, ED_PP_BAD_ARG);
  170. if (arg >= ROM_SIZE_MAX)
  171. return error_info_create(line, ET_PREPROC, ED_PP_ARG_RANGE);
  172. *offset = arg;
  173. return NULL;
  174. }
  175. /*
  176. Handle a block directive by updating the offset and slot.
  177. Return NULL on success and an ErrorInfo object on failure.
  178. */
  179. static ErrorInfo* handle_block_directive(
  180. const ASMLine *line, size_t *offset, ASMSlotInfo *si)
  181. {
  182. if (!DIRECTIVE_HAS_ARG(line, DIR_BLOCK))
  183. return error_info_create(line, ET_PREPROC, ED_PP_NO_ARG);
  184. uint8_t *args, bank, slot;
  185. size_t dir_offset = DIRECTIVE_OFFSET(line, DIR_BLOCK) + 1, nargs;
  186. if (!parse_bytes(&args, &nargs, line->data + dir_offset,
  187. line->length - dir_offset))
  188. return error_info_create(line, ET_PREPROC, ED_PP_BAD_ARG);
  189. if (nargs < 1 || nargs > 2)
  190. return free(args), error_info_create(line, ET_PREPROC, ED_PP_BAD_ARG);
  191. bank = args[0];
  192. slot = nargs == 2 ? args[1] : default_bank_slot(bank);
  193. free(args);
  194. if (bank >= MMU_NUM_ROM_BANKS || slot >= MMU_NUM_SLOTS)
  195. return error_info_create(line, ET_PREPROC, ED_PP_ARG_RANGE);
  196. if (bank == 0 && slot != 0)
  197. return error_info_create(line, ET_LAYOUT, ED_LYT_BLOCK0);
  198. if (si->slots[bank] >= 0 && si->slots[bank] != slot) {
  199. ErrorInfo *ei = error_info_create(line, ET_LAYOUT, ED_LYT_SLOTS);
  200. error_info_append(ei, si->lines[bank]);
  201. return ei;
  202. }
  203. *offset = bank * MMU_ROM_BANK_SIZE;
  204. si->slots[bank] = slot;
  205. if (!si->lines[bank])
  206. si->lines[bank] = line;
  207. return NULL;
  208. }
  209. /*
  210. Parse a .space directive, which fills a region with a single byte.
  211. */
  212. static bool parse_space(
  213. uint8_t **result, size_t *length, const char *arg, ssize_t size)
  214. {
  215. uint8_t *bytes;
  216. size_t nbytes;
  217. if (!parse_bytes(&bytes, &nbytes, arg, size))
  218. return false;
  219. if (nbytes < 1 || nbytes > 2) {
  220. free(bytes);
  221. return false;
  222. }
  223. *length = bytes[0];
  224. *result = cr_malloc(sizeof(uint8_t) * (*length));
  225. memset(*result, nbytes == 2 ? bytes[1] : 0, *length);
  226. free(bytes);
  227. return true;
  228. }
  229. /*
  230. Parse a string like parse_string(), but null-terminate it.
  231. */
  232. static bool parse_cstring(
  233. char **result, size_t *length, const char *arg, ssize_t size)
  234. {
  235. if (!parse_string(result, length, arg, size))
  236. return false;
  237. (*length)++;
  238. *result = cr_realloc(*result, sizeof(char) * (*length));
  239. (*result)[*length - 1] = '\0';
  240. return true;
  241. }
  242. /*
  243. Parse data encoded in a line into an ASMData object.
  244. On success, return NULL and store the instruction in *data_ptr. On failure,
  245. return an ErrorInfo object; *data_ptr is not modified.
  246. */
  247. static ErrorInfo* parse_data(
  248. const ASMLine *line, ASMData **data_ptr, size_t offset)
  249. {
  250. const char *directive;
  251. parser_func parser;
  252. if (IS_DIRECTIVE(line, DIR_BYTE)) {
  253. directive = DIR_BYTE;
  254. parser = parse_bytes;
  255. } else if (IS_DIRECTIVE(line, DIR_SPACE)) {
  256. directive = DIR_SPACE;
  257. parser = parse_space;
  258. } else if (IS_DIRECTIVE(line, DIR_ASCII)) {
  259. directive = DIR_ASCII;
  260. parser = (parser_func) parse_string;
  261. } else if (IS_DIRECTIVE(line, DIR_ASCIZ)) {
  262. directive = DIR_ASCIZ;
  263. parser = (parser_func) parse_cstring;
  264. } else if (IS_DIRECTIVE(line, DIR_ASCIIZ)) {
  265. directive = DIR_ASCIIZ;
  266. parser = (parser_func) parse_cstring;
  267. } else {
  268. return error_info_create(line, ET_PREPROC, ED_PP_UNKNOWN);
  269. }
  270. if (!DIRECTIVE_HAS_ARG(line, directive))
  271. return error_info_create(line, ET_PREPROC, ED_PP_NO_ARG);
  272. size_t dir_offset = DIRECTIVE_OFFSET(line, directive) + 1;
  273. const char *arg = line->data + dir_offset;
  274. size_t arglen = line->length - dir_offset;
  275. ASMData *data = cr_malloc(sizeof(ASMData));
  276. data->loc.offset = offset;
  277. data->next = NULL;
  278. if (!parser(&data->bytes, &data->loc.length, arg, arglen)) {
  279. free(data);
  280. return error_info_create(line, ET_PREPROC, ED_PP_BAD_ARG);
  281. }
  282. *data_ptr = data;
  283. return NULL;
  284. }
  285. /*
  286. Parse an instruction encoded in a line into an ASMInstruction object.
  287. On success, return NULL and store the instruction in *inst_ptr. On failure,
  288. return an ErrorInfo object; *inst_ptr is not modified.
  289. */
  290. static ErrorInfo* parse_instruction(
  291. const ASMLine *line, ASMInstruction **inst_ptr, size_t offset,
  292. ASMDefineTable *deftab)
  293. {
  294. char mnemonic[MAX_MNEMONIC_SIZE] = {0};
  295. size_t i = 0;
  296. while (i < line->length) {
  297. char c = line->data[i];
  298. if (c == ' ')
  299. break;
  300. if (i >= MAX_MNEMONIC_SIZE)
  301. return error_info_create(line, ET_PARSER, ED_PS_OP_TOO_LONG);
  302. if ((c < 'a' || c > 'z') && (c < '0' || c > '9'))
  303. return error_info_create(line, ET_PARSER, ED_PS_OP_INVALID);
  304. mnemonic[i++] = c;
  305. }
  306. if (i < MIN_MNEMONIC_SIZE)
  307. return error_info_create(line, ET_PARSER, ED_PS_OP_TOO_SHORT);
  308. if (i + 1 < line->length)
  309. i++; // Advance past space
  310. uint8_t *bytes;
  311. size_t arglen = line->length - i, length;
  312. char *argstart = arglen > 0 ? line->data + i : NULL, *symbol = NULL;
  313. ASMInstParser parser = get_inst_parser(mnemonic);
  314. if (!parser)
  315. return error_info_create(line, ET_PARSER, ED_PS_OP_UNKNOWN);
  316. ASMArgParseInfo ai = {.arg = argstart, .size = arglen, .deftable = deftab};
  317. ASMErrorDesc edesc = parser(&bytes, &length, &symbol, ai);
  318. if (edesc != ED_NONE)
  319. return error_info_create(line, ET_PARSER, edesc);
  320. ASMInstruction *inst = cr_malloc(sizeof(ASMInstruction));
  321. inst->loc.offset = offset;
  322. inst->loc.length = length;
  323. inst->bytes = bytes;
  324. inst->symbol = symbol;
  325. inst->line = line;
  326. inst->next = NULL;
  327. *inst_ptr = inst;
  328. return NULL;
  329. }
  330. /*
  331. Check if the given object location is legal.
  332. Checks include ROM size bounding, overlapping with existing objects, and
  333. block-crossing assuming the .cross_blocks directive has not been specified.
  334. On success, return NULL and add the location to the overlap table.
  335. On failure, return an ErrorInfo object.
  336. */
  337. static ErrorInfo* check_layout(
  338. ASMLayoutInfo *li, const ASMLocation *loc, const ASMLine *line)
  339. {
  340. const ASMLine *clash = NULL, *clash_origin;
  341. if (loc->offset + loc->length > li->size) {
  342. clash = &bounds_sentinel;
  343. } else {
  344. for (size_t i = 0; i < loc->length; i++) {
  345. if (li->overlap_table[loc->offset + i]) {
  346. clash = li->overlap_table[loc->offset + i];
  347. clash_origin = li->overlap_origins[loc->offset + i];
  348. break;
  349. }
  350. }
  351. }
  352. if (clash) {
  353. ErrorInfo *ei = error_info_create(line, ET_LAYOUT,
  354. (clash == &header_sentinel) ? ED_LYT_OVERLAP_HEAD :
  355. (clash == &bounds_sentinel) ? ED_LYT_BOUNDS : ED_LYT_OVERLAP);
  356. if (li->origin)
  357. error_info_append(ei, li->origin);
  358. if (clash != &header_sentinel && clash != &bounds_sentinel) {
  359. error_info_append(ei, clash);
  360. if (clash_origin)
  361. error_info_append(ei, clash_origin);
  362. }
  363. return ei;
  364. }
  365. uint8_t bank = (loc->offset + loc->length - 1) / MMU_ROM_BANK_SIZE;
  366. if (bank != li->bank && !li->cross_blocks) {
  367. ErrorInfo *ei = error_info_create(line, ET_LAYOUT, ED_LYT_BLOCK_CROSS);
  368. if (li->origin)
  369. error_info_append(ei, li->origin);
  370. return ei;
  371. }
  372. for (size_t i = 0; i < loc->length; i++) {
  373. li->overlap_table[loc->offset + i] = line;
  374. li->overlap_origins[loc->offset + i] = li->origin;
  375. }
  376. return NULL;
  377. }
  378. /*
  379. Tokenize ASMLines into ASMInstructions and ASMData.
  380. NULL is returned on success and an ErrorInfo object is returned on failure.
  381. state->instructions, state->data, and state->symtable may or may not be
  382. modified regardless of success.
  383. */
  384. ErrorInfo* tokenize(AssemblerState *state)
  385. {
  386. ErrorInfo *ei = NULL;
  387. ASMLayoutInfo li;
  388. ASMSlotInfo si = {.lines = {0}};
  389. ASMDefineTable *deftab = asm_deftable_new();
  390. ASMInstruction dummy_inst = {.next = NULL}, *inst, *prev_inst = &dummy_inst;
  391. ASMData dummy_data = {.next = NULL}, *data, *prev_data = &dummy_data;
  392. const ASMLine *line = state->lines;
  393. size_t offset = 0;
  394. DEBUG("Running tokenizer")
  395. init_layout_info(&li, state);
  396. memset(si.slots, -1, MMU_NUM_ROM_BANKS);
  397. while (line) {
  398. if (line->is_label) {
  399. if (offset >= li.size) {
  400. ei = error_info_create(line, ET_LAYOUT, ED_LYT_BOUNDS);
  401. goto cleanup;
  402. }
  403. int8_t slot = si.slots[offset / MMU_NUM_ROM_BANKS];
  404. if ((ei = add_label_to_table(state->symtable, line, offset, slot)))
  405. goto cleanup;
  406. }
  407. else if (IS_LOCAL_DIRECTIVE(line)) {
  408. if (IS_DIRECTIVE(line, DIR_DEFINE)) {
  409. if ((ei = handle_define_directive(line, deftab)))
  410. goto cleanup;
  411. }
  412. else if (IS_DIRECTIVE(line, DIR_UNDEF)) {
  413. if ((ei = handle_undef_directive(line, deftab)))
  414. goto cleanup;
  415. }
  416. else if (IS_DIRECTIVE(line, DIR_ORIGIN)) {
  417. if ((ei = handle_origin_directive(line, &offset)))
  418. goto cleanup;
  419. li.origin = line;
  420. li.bank = offset / MMU_ROM_BANK_SIZE;
  421. }
  422. else if (IS_DIRECTIVE(line, DIR_BLOCK)) {
  423. if ((ei = handle_block_directive(line, &offset, &si)))
  424. goto cleanup;
  425. li.origin = line;
  426. li.bank = offset / MMU_ROM_BANK_SIZE;
  427. }
  428. else {
  429. if ((ei = parse_data(line, &data, offset)))
  430. goto cleanup;
  431. offset += data->loc.length;
  432. prev_data->next = data;
  433. prev_data = data;
  434. if ((ei = check_layout(&li, &data->loc, line)))
  435. goto cleanup;
  436. }
  437. }
  438. else {
  439. if ((ei = parse_instruction(line, &inst, offset, deftab)))
  440. goto cleanup;
  441. offset += inst->loc.length;
  442. prev_inst->next = inst;
  443. prev_inst = inst;
  444. if ((ei = check_layout(&li, &inst->loc, line)))
  445. goto cleanup;
  446. }
  447. line = line->next;
  448. }
  449. cleanup:
  450. state->instructions = dummy_inst.next;
  451. state->data = dummy_data.next;
  452. free_layout_info(&li);
  453. asm_deftable_free(deftab);
  454. return ei;
  455. }