An emulator, assembler, and disassembler for the Sega Game Gear
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 

481 lines
16 KiB

  1. /* Copyright (C) 2014-2015 Ben Kurtovic <ben.kurtovic@gmail.com>
  2. Released under the terms of the MIT License. See LICENSE for details. */
  3. #include <libgen.h>
  4. #include <limits.h>
  5. #include <stdbool.h>
  6. #include <stdint.h>
  7. #include <stdio.h>
  8. #include <stdlib.h>
  9. #include <string.h>
  10. #include "preprocessor.h"
  11. #include "directives.h"
  12. #include "errors.h"
  13. #include "io.h"
  14. #include "parse_util.h"
  15. #include "../logging.h"
  16. #include "../rom.h"
  17. #include "../util.h"
  18. #define MAX_INCLUDE_DEPTH 16
  19. /* Helper macros for preprocess() */
  20. #define FAIL_ON_COND_(cond, err_desc) \
  21. if ((cond)) { \
  22. ei = error_info_create(line, ET_PREPROC, err_desc); \
  23. goto cleanup; \
  24. }
  25. #define CALL_GENERIC_PARSER_(arg_type) \
  26. dparse_##arg_type((arg_type*) &arg, line, directive)
  27. #define CALL_SPECIFIC_PARSER_(arg_type, parser) \
  28. dparse_##parser((arg_type*) &arg, line, directive)
  29. #define DISPATCH_(first, second, target, ...) target
  30. #define CALL_PARSER_(...) \
  31. DISPATCH_(__VA_ARGS__, CALL_SPECIFIC_PARSER_, CALL_GENERIC_PARSER_, \
  32. __VA_ARGS__)(__VA_ARGS__)
  33. #define VALIDATE(func) \
  34. FAIL_ON_COND_(!(func(arg)), ED_PP_BAD_ARG)
  35. #define CHECK_RANGE(bound) \
  36. FAIL_ON_COND_(arg > bound, ED_PP_ARG_RANGE)
  37. #define USE_PARSER(...) \
  38. FAIL_ON_COND_(!CALL_PARSER_(__VA_ARGS__), ED_PP_BAD_ARG)
  39. #define PARSER_BRANCH(arg_type, true_part, false_part) \
  40. if (CALL_PARSER_(arg_type)) {true_part} else {false_part}
  41. #define SAVE_LINE(target) \
  42. if (!dir_is_auto) target = line;
  43. #define BEGIN_DIRECTIVE_BLOCK \
  44. ssize_t first_ctr = -1; \
  45. if (0) {}
  46. #define BEGIN_DIRECTIVE(d, arg_type, dest_loc, auto_val) \
  47. else if (first_ctr++, IS_DIRECTIVE(line, d)) { \
  48. directive = d; \
  49. FAIL_ON_COND_(!DIRECTIVE_HAS_ARG(line, directive), ED_PP_NO_ARG) \
  50. arg_type arg = 0; \
  51. arg_type* dest = &(dest_loc); \
  52. bool dir_is_auto = DIRECTIVE_IS_AUTO(line, directive); \
  53. if (dir_is_auto) { \
  54. arg = auto_val; \
  55. } else {
  56. #define END_DIRECTIVE \
  57. } \
  58. if (firsts[first_ctr] && *dest != arg) { \
  59. ei = error_info_create(line, ET_PREPROC, ED_PP_DUPLICATE); \
  60. error_info_append(ei, firsts[first_ctr]); \
  61. goto cleanup; \
  62. } \
  63. *dest = arg; \
  64. firsts[first_ctr] = line; \
  65. }
  66. #define END_DIRECTIVE_BLOCK \
  67. else FAIL_ON_COND_(true, ED_PP_UNKNOWN)
  68. /*
  69. Functions similar memcpy, but lowercases the characters along the way.
  70. */
  71. static void memcpy_lc(char *restrict dst, const char *restrict src, size_t n)
  72. {
  73. while (n-- > 0) {
  74. char c = *(src++);
  75. if (c >= 'A' && c <= 'Z')
  76. c += 'a' - 'A';
  77. *(dst++) = c;
  78. }
  79. }
  80. /*
  81. Preprocess a single source line for labels.
  82. Return the index of first non-whitespace non-label character. *head_ptr is
  83. updated to the first label in sequence, and *tail_ptr to the last. Both
  84. will be set to NULL if the line doesn't contain labels.
  85. */
  86. static size_t read_labels(
  87. const char *source, size_t length, ASMLine **head_ptr, ASMLine **tail_ptr)
  88. {
  89. size_t start = 0, i, nexti;
  90. while (start < length && (source[start] == ' ' || source[start] == '\t'))
  91. start++;
  92. i = start;
  93. while (i < length && is_valid_symbol_char(source[i], i == start))
  94. i++;
  95. if (i == start || i == length || source[i] != ':') {
  96. *head_ptr = NULL;
  97. *tail_ptr = NULL;
  98. return 0;
  99. }
  100. ASMLine *line = cr_malloc(sizeof(ASMLine));
  101. line->data = cr_malloc(sizeof(char) * (i - start + 1));
  102. memcpy_lc(line->data, source + start, i - start + 1);
  103. line->length = i - start + 1;
  104. line->is_label = true;
  105. nexti = read_labels(source + i + 1, length - i - 1, &line->next, tail_ptr);
  106. *head_ptr = line;
  107. if (!nexti)
  108. *tail_ptr = line;
  109. return i + 1 + nexti;
  110. }
  111. /*
  112. Preprocess a single source line (source, length) into one or more ASMLines.
  113. Only the data, length, is_label, and next fields of the ASMLine objects are
  114. populated. The normalization process strips comments, makes various
  115. adjustments outside of string literals (converts tabs to spaces, lowercases
  116. all alphabetical characters, and removes runs of multiple spaces), among
  117. other things.
  118. Return NULL if an ASM line was not generated from the source, i.e. if it is
  119. blank after being stripped.
  120. */
  121. static ASMLine* normalize_line(const char *source, size_t length)
  122. {
  123. ASMLine *head, *tail;
  124. size_t offset = read_labels(source, length, &head, &tail);
  125. source += offset;
  126. length -= offset;
  127. char *data = cr_malloc(sizeof(char) * length);
  128. size_t si, di, slashes = 0;
  129. bool has_content = false, space_pending = false, in_string = false;
  130. for (si = di = 0; si < length; si++) {
  131. char c = source[si];
  132. if (c == '\\')
  133. slashes++;
  134. else
  135. slashes = 0;
  136. if (in_string) {
  137. if (c == '"' && (slashes % 2) == 0)
  138. in_string = false;
  139. data[di++] = c;
  140. } else {
  141. if (c == ';')
  142. break;
  143. if (c == '"' && (slashes % 2) == 0)
  144. in_string = true;
  145. if (c >= 'A' && c <= 'Z')
  146. c += 'a' - 'A';
  147. if (c == ' ' || c == '\t')
  148. space_pending = true;
  149. else {
  150. if (space_pending) {
  151. if (has_content)
  152. data[di++] = ' ';
  153. space_pending = false;
  154. }
  155. has_content = true;
  156. data[di++] = c;
  157. }
  158. }
  159. }
  160. if (!has_content) {
  161. free(data);
  162. return head;
  163. }
  164. ASMLine *line = cr_malloc(sizeof(ASMLine));
  165. data = cr_realloc(data, sizeof(char) * di);
  166. line->data = data;
  167. line->length = di;
  168. line->is_label = false;
  169. line->next = NULL;
  170. if (head) { // Line has labels, so link the main part up
  171. tail->next = line;
  172. return head;
  173. }
  174. return line;
  175. }
  176. /*
  177. Read and return the target path from an include directive.
  178. This function allocates a buffer to store the filename; it must be free()'d
  179. after calling read_source_file(). If a syntax error occurs while trying to
  180. read the path, it returns NULL.
  181. */
  182. static char* read_include_path(const ASMLine *line)
  183. {
  184. size_t maxlen = strlen(line->filename) + line->length, i, baselen;
  185. if (maxlen >= INT_MAX) // Allows us to safely downcast to int later
  186. return NULL;
  187. char *path = cr_malloc(sizeof(char) * maxlen), *base, *dup;
  188. if (!(i = DIRECTIVE_OFFSET(line, DIR_INCLUDE)))
  189. goto error;
  190. if (line->length - i <= 3) // Not long enough to hold a non-zero argument
  191. goto error;
  192. if (line->data[i++] != ' ')
  193. goto error;
  194. if (!parse_string(&base, &baselen, line->data + i, line->length - i))
  195. goto error;
  196. dup = cr_strdup(line->filename);
  197. // TODO: should normalize filenames in some way to prevent accidental dupes
  198. snprintf(path, maxlen, "%s/%.*s", dirname(dup), (int) baselen, base);
  199. free(dup);
  200. free(base);
  201. return path;
  202. error:
  203. free(path);
  204. return NULL;
  205. }
  206. /*
  207. Build a LineBuffer into a ASMLines, normalizing them along the way.
  208. This function operates recursively to handle includes, but handles no other
  209. preprocessor directives.
  210. On success, NULL is returned; *head points to the head of the new ASMLine
  211. list, and *tail to its tail (assuming it is non-NULL). On error, an
  212. ErrorInfo object is returned, and *head and *tail are not modified.
  213. *includes may be updated in either case.
  214. */
  215. static ErrorInfo* build_asm_lines(
  216. const LineBuffer *source, ASMLine **head, ASMLine **tail,
  217. ASMInclude **includes, unsigned depth)
  218. {
  219. ErrorInfo *ei;
  220. ASMLine dummy = {.next = NULL};
  221. ASMLine *line, *prev = &dummy, *temp;
  222. const Line *orig, *next_orig = source->lines;
  223. while ((orig = next_orig)) {
  224. line = temp = normalize_line(orig->data, orig->length);
  225. next_orig = orig->next;
  226. if (!line)
  227. continue;
  228. // Populate ASMLine fields not set by normalize_line():
  229. while (temp) {
  230. temp->original = orig;
  231. temp->filename = source->filename;
  232. temp = temp->next;
  233. }
  234. // If there are multiple ASMLines, all but the last must be labels:
  235. if (line->next) {
  236. while (line->next) {
  237. prev->next = line;
  238. prev = line;
  239. line = line->next;
  240. }
  241. prev->next = NULL; // Disconnect in case the line is an .include
  242. }
  243. if (IS_DIRECTIVE(line, DIR_INCLUDE)) {
  244. char *path = read_include_path(line);
  245. if (!path) {
  246. ei = error_info_create(line, ET_INCLUDE, ED_INC_BAD_ARG);
  247. goto error;
  248. }
  249. if (depth >= MAX_INCLUDE_DEPTH) {
  250. free(path);
  251. ei = error_info_create(line, ET_INCLUDE, ED_INC_DEPTH);
  252. goto error;
  253. }
  254. DEBUG("- reading included file: %s", path)
  255. LineBuffer *incbuffer = read_source_file(path, false);
  256. free(path);
  257. if (!incbuffer) {
  258. ei = error_info_create(line, ET_INCLUDE, ED_INC_FILE_READ);
  259. goto error;
  260. }
  261. ASMInclude *include = cr_malloc(sizeof(ASMInclude));
  262. include->lines = incbuffer;
  263. include->next = *includes;
  264. *includes = include;
  265. ASMLine *inchead, *inctail;
  266. if ((ei = build_asm_lines(incbuffer, &inchead, &inctail, includes,
  267. depth + 1))) {
  268. error_info_append(ei, line);
  269. goto error;
  270. }
  271. prev->next = inchead;
  272. prev = inctail;
  273. asm_lines_free(line); // Destroy only the .include line
  274. }
  275. else {
  276. prev->next = line;
  277. prev = line;
  278. }
  279. }
  280. *head = dummy.next;
  281. if (tail)
  282. *tail = prev;
  283. return NULL;
  284. error:
  285. asm_lines_free(line);
  286. asm_lines_free(dummy.next);
  287. return ei;
  288. }
  289. /*
  290. Return whether the given ROM size is valid.
  291. */
  292. static inline bool is_rom_size_valid(size_t size)
  293. {
  294. return size_bytes_to_code(size) != INVALID_SIZE_CODE;
  295. }
  296. /*
  297. Return whether the given header offset is a valid location.
  298. */
  299. static inline bool is_header_offset_valid(uint16_t offset)
  300. {
  301. return offset == 0x7FF0 || offset == 0x3FF0 || offset == 0x1FF0;
  302. }
  303. /*
  304. Preprocess the LineBuffer into ASMLines. Change some state along the way.
  305. This function processes include directives, so read_source_file() may be
  306. called multiple times (along with the implications that has), and
  307. state->includes may be modified.
  308. On success, NULL is returned. On error, an ErrorInfo object is returned.
  309. state->lines and state->includes may still be modified.
  310. */
  311. ErrorInfo* preprocess(AssemblerState *state, const LineBuffer *source)
  312. {
  313. ErrorInfo* ei = NULL;
  314. DEBUG("Running preprocessor")
  315. if ((ei = build_asm_lines(source, &state->lines, NULL, &state->includes, 0)))
  316. return ei;
  317. const ASMLine *firsts[NUM_DIRECTIVES];
  318. for (size_t i = 0; i < NUM_DIRECTIVES; i++)
  319. firsts[i] = NULL;
  320. ASMLine dummy = {.next = state->lines};
  321. ASMLine *prev, *line = &dummy, *next = state->lines, *condemned = NULL;
  322. const ASMLine *rom_size_line = NULL, *rom_declsize_line = NULL;
  323. const char *directive;
  324. while ((prev = line, line = next)) {
  325. next = line->next;
  326. if (line->is_label || line->data[0] != DIRECTIVE_MARKER)
  327. continue;
  328. if (IS_LOCAL_DIRECTIVE(line))
  329. continue; // "Local" directives are handled by the tokenizer
  330. DEBUG("- handling directive: %.*s", (int) line->length, line->data)
  331. BEGIN_DIRECTIVE_BLOCK
  332. BEGIN_DIRECTIVE(DIR_ROM_SIZE, size_t, state->rom_size, 0)
  333. PARSER_BRANCH(uint32_t, {}, {
  334. USE_PARSER(uint32_t, rom_size)
  335. })
  336. VALIDATE(is_rom_size_valid)
  337. SAVE_LINE(rom_size_line)
  338. END_DIRECTIVE
  339. BEGIN_DIRECTIVE(DIR_ROM_HEADER, size_t, state->header.offset, DEFAULT_HEADER_OFFSET)
  340. USE_PARSER(uint16_t)
  341. VALIDATE(is_header_offset_valid)
  342. END_DIRECTIVE
  343. BEGIN_DIRECTIVE(DIR_ROM_CHECKSUM, bool, state->header.checksum, true)
  344. USE_PARSER(bool)
  345. END_DIRECTIVE
  346. BEGIN_DIRECTIVE(DIR_ROM_PRODUCT, uint32_t, state->header.product_code, 0)
  347. USE_PARSER(uint32_t)
  348. CHECK_RANGE(160000)
  349. END_DIRECTIVE
  350. BEGIN_DIRECTIVE(DIR_ROM_VERSION, uint8_t, state->header.version, 0)
  351. USE_PARSER(uint8_t)
  352. CHECK_RANGE(0x10)
  353. END_DIRECTIVE
  354. BEGIN_DIRECTIVE(DIR_ROM_REGION, uint8_t, state->header.region, DEFAULT_REGION)
  355. PARSER_BRANCH(uint8_t, {
  356. CHECK_RANGE(0x10)
  357. VALIDATE(region_code_to_string)
  358. }, {
  359. USE_PARSER(uint8_t, region_string)
  360. })
  361. END_DIRECTIVE
  362. BEGIN_DIRECTIVE(DIR_ROM_DECLSIZE, uint8_t, state->header.rom_size, DEFAULT_DECLSIZE)
  363. PARSER_BRANCH(uint8_t, {
  364. CHECK_RANGE(0x10)
  365. VALIDATE(size_code_to_bytes)
  366. }, {
  367. USE_PARSER(uint8_t, size_code)
  368. })
  369. SAVE_LINE(rom_declsize_line)
  370. END_DIRECTIVE
  371. BEGIN_DIRECTIVE(DIR_CROSS_BLOCKS, bool, state->cross_blocks, false)
  372. USE_PARSER(bool)
  373. END_DIRECTIVE
  374. END_DIRECTIVE_BLOCK
  375. // Remove directive from lines, and schedule it for deletion:
  376. line->next = condemned;
  377. condemned = line;
  378. prev->next = next;
  379. line = prev;
  380. }
  381. if (rom_size_line && state->header.offset + HEADER_SIZE > state->rom_size) {
  382. // TODO: maybe should force offset to be explicit, otherwise autofix
  383. ei = error_info_create(rom_size_line, ET_LAYOUT, ED_LYT_HEADER_RANGE);
  384. goto cleanup;
  385. }
  386. if (rom_size_line && rom_declsize_line &&
  387. size_code_to_bytes(state->header.rom_size) > state->rom_size) {
  388. ei = error_info_create(rom_size_line, ET_LAYOUT, ED_LYT_DECL_RANGE);
  389. error_info_append(ei, rom_declsize_line);
  390. goto cleanup;
  391. }
  392. if (!rom_declsize_line) // Mark as undefined, for resolve_defaults()
  393. state->header.rom_size = INVALID_SIZE_CODE;
  394. cleanup:
  395. asm_lines_free(condemned);
  396. state->lines = dummy.next; // Fix list head if first line was a directive
  397. return ei;
  398. }