An emulator, assembler, and disassembler for the Sega Game Gear
Du kannst nicht mehr als 25 Themen auswählen Themen müssen entweder mit einem Buchstaben oder einer Ziffer beginnen. Sie können Bindestriche („-“) enthalten und bis zu 35 Zeichen lang sein.
 
 
 
 
 

433 Zeilen
14 KiB

  1. /* Copyright (C) 2014-2015 Ben Kurtovic <ben.kurtovic@gmail.com>
  2. Released under the terms of the MIT License. See LICENSE for details. */
  3. #include <libgen.h>
  4. #include <limits.h>
  5. #include <stdbool.h>
  6. #include <stdint.h>
  7. #include <stdio.h>
  8. #include <stdlib.h>
  9. #include <string.h>
  10. #include "preprocessor.h"
  11. #include "directives.h"
  12. #include "errors.h"
  13. #include "io.h"
  14. #include "parse_util.h"
  15. #include "../logging.h"
  16. #include "../util.h"
  17. /* Helper macros for preprocess() */
  18. #define FAIL_ON_COND_(cond, err_desc) \
  19. if ((cond)) { \
  20. ei = error_info_create(line, ET_PREPROC, err_desc); \
  21. goto cleanup; \
  22. }
  23. #define CALL_GENERIC_PARSER_(arg_type) \
  24. parse_##arg_type((arg_type*) &arg, line, directive)
  25. #define CALL_SPECIFIC_PARSER_(arg_type, parser) \
  26. parse_##parser((arg_type*) &arg, line)
  27. #define DISPATCH_(first, second, target, ...) target
  28. #define CALL_PARSER_(...) \
  29. DISPATCH_(__VA_ARGS__, CALL_SPECIFIC_PARSER_, CALL_GENERIC_PARSER_, \
  30. __VA_ARGS__)(__VA_ARGS__)
  31. #define VALIDATE(func) \
  32. FAIL_ON_COND_(!(func(arg)), ED_PP_BAD_ARG)
  33. #define CHECK_RANGE(bound) \
  34. FAIL_ON_COND_(arg > bound, ED_PP_ARG_RANGE)
  35. #define USE_PARSER(...) \
  36. FAIL_ON_COND_(!CALL_PARSER_(__VA_ARGS__), ED_PP_BAD_ARG)
  37. #define PARSER_BRANCH(arg_type, true_part, false_part) \
  38. if (CALL_PARSER_(arg_type)) {true_part} else {false_part}
  39. #define BEGIN_DIRECTIVE_BLOCK \
  40. ssize_t first_ctr = -1; \
  41. if (0) {}
  42. #define BEGIN_DIRECTIVE(d, arg_type, dest_loc, auto_val) \
  43. else if (first_ctr++, IS_DIRECTIVE(line, d)) { \
  44. directive = d; \
  45. FAIL_ON_COND_(!DIRECTIVE_HAS_ARG(line, directive), ED_PP_NO_ARG) \
  46. arg_type arg; \
  47. arg_type* dest = &(dest_loc); \
  48. if (DIRECTIVE_IS_AUTO(line, directive)) { \
  49. arg = auto_val; \
  50. } else {
  51. #define END_DIRECTIVE \
  52. } \
  53. if (firsts[first_ctr] && *dest != arg) { \
  54. ei = error_info_create(line, ET_PREPROC, ED_PP_DUPLICATE); \
  55. error_info_append(ei, firsts[first_ctr]); \
  56. goto cleanup; \
  57. } \
  58. *dest = arg; \
  59. firsts[first_ctr] = line; \
  60. }
  61. #define END_DIRECTIVE_BLOCK \
  62. else FAIL_ON_COND_(true, ED_PP_UNKNOWN)
  63. /*
  64. Preprocess a single source line (source, length) into a normalized ASMLine.
  65. *Only* the data and length fields in the ASMLine object are populated. The
  66. normalization process converts tabs to spaces, lowercases all alphabetical
  67. characters, and removes runs of multiple spaces (outside of string
  68. literals), strips comments, and other things.
  69. Return NULL if an ASM line was not generated from the source, i.e. if it is
  70. blank after being stripped.
  71. */
  72. static ASMLine* normalize_line(const char *source, size_t length)
  73. {
  74. char *data = malloc(sizeof(char) * length);
  75. if (!data)
  76. OUT_OF_MEMORY()
  77. size_t si, di, slashes = 0;
  78. bool has_content = false, space_pending = false, in_string = false;
  79. for (si = di = 0; si < length; si++) {
  80. char c = source[si];
  81. if (c == '\\')
  82. slashes++;
  83. else
  84. slashes = 0;
  85. if (in_string) {
  86. if (c == '"' && (slashes % 2) == 0)
  87. in_string = false;
  88. data[di++] = c;
  89. } else {
  90. if (c == ';')
  91. break;
  92. if (c == '"' && (slashes % 2) == 0)
  93. in_string = true;
  94. if (c >= 'A' && c <= 'Z')
  95. c += 'a' - 'A';
  96. if (c == ' ' || c == '\t')
  97. space_pending = true;
  98. else {
  99. if (space_pending) {
  100. if (has_content)
  101. data[di++] = ' ';
  102. space_pending = false;
  103. }
  104. has_content = true;
  105. data[di++] = c;
  106. }
  107. }
  108. }
  109. if (!has_content) {
  110. free(data);
  111. return NULL;
  112. }
  113. ASMLine *line = malloc(sizeof(ASMLine));
  114. if (!line)
  115. OUT_OF_MEMORY()
  116. data = realloc(data, sizeof(char) * di);
  117. if (!data)
  118. OUT_OF_MEMORY()
  119. line->data = data;
  120. line->length = di;
  121. return line;
  122. }
  123. /*
  124. Read and return the target path from an include directive.
  125. This function allocates a buffer to store the filename; it must be free()'d
  126. after calling read_source_file(). If a syntax error occurs while trying to
  127. read the path, it returns NULL.
  128. */
  129. static char* read_include_path(const ASMLine *line)
  130. {
  131. size_t maxlen = strlen(line->filename) + line->length, i, start, slashes;
  132. if (maxlen >= INT_MAX) // Allows us to safely downcast to int later
  133. return NULL;
  134. char *path = malloc(sizeof(char) * maxlen);
  135. if (!path)
  136. OUT_OF_MEMORY()
  137. if (!(i = DIRECTIVE_OFFSET(line, DIR_INCLUDE)))
  138. goto error;
  139. if (line->length - i <= 4) // Not long enough to hold a non-zero argument
  140. goto error;
  141. if (line->data[i++] != ' ' || line->data[i++] != '"')
  142. goto error;
  143. // TODO: parse escaped characters properly
  144. for (start = i, slashes = 0; i < line->length; i++) {
  145. if (line->data[i] == '"' && (slashes % 2) == 0)
  146. break;
  147. if (line->data[i] == '\\')
  148. slashes++;
  149. else
  150. slashes = 0;
  151. }
  152. if (i != line->length - 1) // Junk present after closing quote
  153. goto error;
  154. char *dup = strdup(line->filename);
  155. if (!dup)
  156. OUT_OF_MEMORY()
  157. // TODO: should normalize filenames in some way to prevent accidental dupes
  158. snprintf(path, maxlen, "%s/%.*s", dirname(dup), (int) (i - start),
  159. line->data + start);
  160. free(dup);
  161. return path;
  162. error:
  163. free(path);
  164. return NULL;
  165. }
  166. /*
  167. Return whether the given path has already been loaded.
  168. */
  169. static bool path_has_been_loaded(
  170. const char *path, const LineBuffer *root, const ASMInclude *include)
  171. {
  172. if (!strcmp(path, root->filename))
  173. return true;
  174. while (include) {
  175. if (!strcmp(path, include->lines->filename))
  176. return true;
  177. include = include->next;
  178. }
  179. return false;
  180. }
  181. /*
  182. Build a LineBuffer into a ASMLines, normalizing them along the way.
  183. This function operates recursively to handle includes, but handles no other
  184. preprocessor directives.
  185. On success, NULL is returned; *head points to the head of the new ASMLine
  186. list, and *tail to its tail (assuming it is non-NULL). On error, an
  187. ErrorInfo object is returned, and *head and *tail are not modified.
  188. *includes may be updated in either case.
  189. */
  190. static ErrorInfo* build_asm_lines(
  191. const LineBuffer *root, const LineBuffer *source, ASMLine **head,
  192. ASMLine **tail, ASMInclude **includes)
  193. {
  194. ASMLine dummy = {.next = NULL};
  195. ASMLine *line, *prev = &dummy;
  196. const Line *orig, *next_orig = source->lines;
  197. while ((orig = next_orig)) {
  198. line = normalize_line(orig->data, orig->length);
  199. next_orig = orig->next;
  200. if (!line)
  201. continue;
  202. // Populate ASMLine fields not set by normalize_line():
  203. line->original = orig;
  204. line->filename = source->filename;
  205. line->next = NULL;
  206. if (IS_DIRECTIVE(line, DIR_INCLUDE)) {
  207. ErrorInfo *ei;
  208. char *path = read_include_path(line);
  209. if (!path) {
  210. ei = error_info_create(line, ET_INCLUDE, ED_INC_BAD_ARG);
  211. asm_lines_free(line);
  212. asm_lines_free(dummy.next);
  213. return ei;
  214. }
  215. if (path_has_been_loaded(path, root, *includes)) {
  216. ei = error_info_create(line, ET_INCLUDE, ED_INC_RECURSION);
  217. asm_lines_free(line);
  218. asm_lines_free(dummy.next);
  219. free(path);
  220. return ei;
  221. }
  222. DEBUG("- reading included file: %s", path)
  223. LineBuffer *incbuffer = read_source_file(path, false);
  224. free(path);
  225. if (!incbuffer) {
  226. ei = error_info_create(line, ET_INCLUDE, ED_INC_FILE_READ);
  227. asm_lines_free(line);
  228. asm_lines_free(dummy.next);
  229. return ei;
  230. }
  231. ASMInclude *include = malloc(sizeof(ASMInclude));
  232. if (!include)
  233. OUT_OF_MEMORY()
  234. include->lines = incbuffer;
  235. include->next = *includes;
  236. *includes = include;
  237. ASMLine *inchead, *inctail;
  238. if ((ei = build_asm_lines(root, incbuffer, &inchead, &inctail,
  239. includes))) {
  240. error_info_append(ei, line);
  241. asm_lines_free(line);
  242. asm_lines_free(dummy.next);
  243. return ei;
  244. }
  245. prev->next = inchead;
  246. prev = inctail;
  247. asm_lines_free(line); // Destroy only the .include line
  248. }
  249. else {
  250. prev->next = line;
  251. prev = line;
  252. }
  253. }
  254. *head = dummy.next;
  255. if (tail)
  256. *tail = prev;
  257. return NULL;
  258. }
  259. /*
  260. Return whether the given header offset is a valid location.
  261. */
  262. static inline bool is_header_offset_valid(uint16_t offset)
  263. {
  264. return offset == 0x7FF0 || offset == 0x3FF0 || offset == 0x1FF0;
  265. }
  266. /*
  267. Preprocess the LineBuffer into ASMLines. Change some state along the way.
  268. This function processes include directives, so read_source_file() may be
  269. called multiple times (along with the implications that has), and
  270. state->includes may be modified.
  271. On success, NULL is returned. On error, an ErrorInfo object is returned.
  272. state->lines and state->includes may still be modified.
  273. */
  274. ErrorInfo* preprocess(AssemblerState *state, const LineBuffer *source)
  275. {
  276. ErrorInfo* ei = NULL;
  277. DEBUG("Running preprocessor:")
  278. if ((ei = build_asm_lines(source, source, &state->lines, NULL,
  279. &state->includes)))
  280. return ei;
  281. const ASMLine *firsts[NUM_DIRECTIVES];
  282. for (size_t i = 0; i < NUM_DIRECTIVES; i++)
  283. firsts[i] = NULL;
  284. ASMLine dummy = {.next = state->lines};
  285. ASMLine *prev, *line = &dummy, *next = state->lines, *condemned = NULL;
  286. const char *directive;
  287. while ((prev = line, line = next)) {
  288. next = line->next;
  289. if (line->data[0] != DIRECTIVE_MARKER)
  290. continue;
  291. if (IS_DIRECTIVE(line, DIR_ORIGIN))
  292. continue; // Origins are handled by tokenizer
  293. DEBUG("- handling directive: %.*s", (int) line->length, line->data)
  294. BEGIN_DIRECTIVE_BLOCK
  295. BEGIN_DIRECTIVE(DIR_OPTIMIZER, bool, state->optimizer, false)
  296. USE_PARSER(bool)
  297. END_DIRECTIVE
  298. BEGIN_DIRECTIVE(DIR_ROM_SIZE, size_t, state->rom_size, 0)
  299. USE_PARSER(uint32_t, rom_size)
  300. END_DIRECTIVE
  301. BEGIN_DIRECTIVE(DIR_ROM_HEADER, size_t, state->header.offset, DEFAULT_HEADER_OFFSET)
  302. USE_PARSER(uint16_t)
  303. VALIDATE(is_header_offset_valid)
  304. END_DIRECTIVE
  305. BEGIN_DIRECTIVE(DIR_ROM_CHECKSUM, bool, state->header.checksum, true)
  306. USE_PARSER(bool)
  307. END_DIRECTIVE
  308. BEGIN_DIRECTIVE(DIR_ROM_PRODUCT, uint32_t, state->header.product_code, 0)
  309. USE_PARSER(uint32_t)
  310. CHECK_RANGE(160000)
  311. END_DIRECTIVE
  312. BEGIN_DIRECTIVE(DIR_ROM_VERSION, uint8_t, state->header.version, 0)
  313. USE_PARSER(uint8_t)
  314. CHECK_RANGE(0x10)
  315. END_DIRECTIVE
  316. BEGIN_DIRECTIVE(DIR_ROM_REGION, uint8_t, state->header.region, DEFAULT_REGION)
  317. PARSER_BRANCH(uint8_t, {
  318. CHECK_RANGE(0x10)
  319. VALIDATE(region_code_to_string)
  320. }, {
  321. USE_PARSER(uint8_t, region_string)
  322. })
  323. END_DIRECTIVE
  324. BEGIN_DIRECTIVE(DIR_ROM_DECLSIZE, uint8_t, state->header.rom_size, 0)
  325. PARSER_BRANCH(uint8_t, {
  326. CHECK_RANGE(0x10)
  327. VALIDATE(size_code_to_bytes)
  328. }, {
  329. USE_PARSER(uint8_t, size_code)
  330. })
  331. END_DIRECTIVE
  332. END_DIRECTIVE_BLOCK
  333. // Remove directive from lines, and schedule it for deletion:
  334. line->next = condemned;
  335. condemned = line;
  336. prev->next = next;
  337. line = prev;
  338. }
  339. // TODO: if giving rom size, check header offset is in rom size range
  340. // TODO: if giving reported and actual rom size, check reported is <= actual
  341. state->rom_size = 8; // TODO
  342. cleanup:
  343. asm_lines_free(condemned);
  344. state->lines = dummy.next; // Fix list head if first line was a directive
  345. #ifdef DEBUG_MODE
  346. DEBUG("Dumping ASMLines:")
  347. const ASMLine *temp = state->lines;
  348. while (temp) {
  349. DEBUG("- %-40.*s [%s:%02zu]", (int) temp->length, temp->data,
  350. temp->filename, temp->original->lineno)
  351. temp = temp->next;
  352. }
  353. #endif
  354. return ei;
  355. }