An emulator, assembler, and disassembler for the Sega Game Gear
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 

510 lines
16 KiB

  1. /* Copyright (C) 2014-2015 Ben Kurtovic <ben.kurtovic@gmail.com>
  2. Released under the terms of the MIT License. See LICENSE for details. */
  3. #include <libgen.h>
  4. #include <limits.h>
  5. #include <stdint.h>
  6. #include <stdio.h>
  7. #include <stdlib.h>
  8. #include <string.h>
  9. #include "preprocessor.h"
  10. #include "errors.h"
  11. #include "io.h"
  12. #include "../logging.h"
  13. #define DIRECTIVE_MARKER '.'
  14. #define DIR_INCLUDE ".include"
  15. #define DIR_ORIGIN ".org"
  16. #define DIR_OPTIMIZER ".optimizer"
  17. #define DIR_ROM_SIZE ".rom_size"
  18. #define DIR_ROM_HEADER ".rom_header"
  19. #define DIR_ROM_CHECKSUM ".rom_checksum"
  20. #define DIR_ROM_PRODUCT ".rom_product"
  21. #define DIR_ROM_VERSION ".rom_version"
  22. #define DIR_ROM_REGION ".rom_region"
  23. #define DIR_ROM_DECLSIZE ".rom_declsize"
  24. #define DIRECTIVE_HAS_ARG(line, d) ((line)->length > strlen(d))
  25. #define IS_DIRECTIVE(line, d) \
  26. (((line)->length >= strlen(d)) && \
  27. !strncmp((line)->data, d, strlen(d)) && \
  28. (!DIRECTIVE_HAS_ARG(line, d) || (line)->data[strlen(d)] == ' '))
  29. #define DIRECTIVE_OFFSET(line, d) \
  30. (DIRECTIVE_HAS_ARG(line, d) ? strlen(d) : 0)
  31. /*
  32. Preprocess a single source line (source, length) into a normalized ASMLine.
  33. *Only* the data and length fields in the ASMLine object are populated. The
  34. normalization process converts tabs to spaces, lowercases all alphabetical
  35. characters, and removes runs of multiple spaces (outside of string
  36. literals), strips comments, and other things.
  37. Return NULL if an ASM line was not generated from the source, i.e. if it is
  38. blank after being stripped.
  39. */
  40. static ASMLine* normalize_line(const char *source, size_t length)
  41. {
  42. char *data = malloc(sizeof(char) * length);
  43. if (!data)
  44. OUT_OF_MEMORY()
  45. size_t si, di, slashes = 0;
  46. bool has_content = false, space_pending = false, in_string = false;
  47. for (si = di = 0; si < length; si++) {
  48. char c = source[si];
  49. if (c == '\\')
  50. slashes++;
  51. else
  52. slashes = 0;
  53. if (in_string) {
  54. if (c == '"' && (slashes % 2) == 0)
  55. in_string = false;
  56. data[di++] = c;
  57. } else {
  58. if (c == ';')
  59. break;
  60. if (c == '"' && (slashes % 2) == 0)
  61. in_string = true;
  62. if (c >= 'A' && c <= 'Z')
  63. c += 'a' - 'A';
  64. if (c == ' ' || c == '\t')
  65. space_pending = true;
  66. else {
  67. if (space_pending) {
  68. if (has_content)
  69. data[di++] = ' ';
  70. space_pending = false;
  71. }
  72. has_content = true;
  73. data[di++] = c;
  74. }
  75. }
  76. }
  77. if (!has_content) {
  78. free(data);
  79. return NULL;
  80. }
  81. ASMLine *line = malloc(sizeof(ASMLine));
  82. if (!line)
  83. OUT_OF_MEMORY()
  84. data = realloc(data, sizeof(char) * di);
  85. if (!data)
  86. OUT_OF_MEMORY()
  87. line->data = data;
  88. line->length = di;
  89. return line;
  90. }
  91. /*
  92. Read and return the target path from an include directive.
  93. This function allocates a buffer to store the filename; it must be free()'d
  94. after calling read_source_file(). If a syntax error occurs while trying to
  95. read the path, it returns NULL.
  96. */
  97. char* read_include_path(const ASMLine *line)
  98. {
  99. size_t maxlen = strlen(line->filename) + line->length, i, start, slashes;
  100. if (maxlen >= INT_MAX) // Allows us to safely downcast to int later
  101. return NULL;
  102. char *path = malloc(sizeof(char) * maxlen);
  103. if (!path)
  104. OUT_OF_MEMORY()
  105. if (!(i = DIRECTIVE_OFFSET(line, DIR_INCLUDE)))
  106. goto error;
  107. if (line->length - i <= 4) // Not long enough to hold a non-zero argument
  108. goto error;
  109. if (line->data[i++] != ' ' || line->data[i++] != '"')
  110. goto error;
  111. // TODO: parse escaped characters properly
  112. for (start = i, slashes = 0; i < line->length; i++) {
  113. if (line->data[i] == '"' && (slashes % 2) == 0)
  114. break;
  115. if (line->data[i] == '\\')
  116. slashes++;
  117. else
  118. slashes = 0;
  119. }
  120. if (i != line->length - 1) // Junk present after closing quote
  121. goto error;
  122. char *dup = strdup(line->filename);
  123. if (!dup)
  124. OUT_OF_MEMORY()
  125. // TODO: should normalize filenames in some way to prevent accidental dupes
  126. snprintf(path, maxlen, "%s/%.*s", dirname(dup), (int) (i - start),
  127. line->data + start);
  128. free(dup);
  129. return path;
  130. error:
  131. free(path);
  132. return NULL;
  133. }
  134. /*
  135. Return whether the given path has already been loaded.
  136. */
  137. static bool path_has_been_loaded(
  138. const char *path, const LineBuffer *root, const ASMInclude *include)
  139. {
  140. if (!strcmp(path, root->filename))
  141. return true;
  142. while (include) {
  143. if (!strcmp(path, include->lines->filename))
  144. return true;
  145. include = include->next;
  146. }
  147. return false;
  148. }
  149. /*
  150. Build a LineBuffer into a ASMLines, normalizing them along the way.
  151. This function operates recursively to handle includes, but handles no other
  152. preprocessor directives.
  153. On success, NULL is returned; *head points to the head of the new ASMLine
  154. list, and *tail to its tail (assuming it is non-NULL). On error, an
  155. ErrorInfo object is returned, and *head and *tail are not modified.
  156. *includes may be updated in either case.
  157. */
  158. static ErrorInfo* build_asm_lines(
  159. const LineBuffer *root, const LineBuffer *source, ASMLine **head,
  160. ASMLine **tail, ASMInclude **includes)
  161. {
  162. ASMLine dummy = {.next = NULL};
  163. ASMLine *line, *prev = &dummy;
  164. const Line *orig, *next_orig = source->lines;
  165. while ((orig = next_orig)) {
  166. line = normalize_line(orig->data, orig->length);
  167. next_orig = orig->next;
  168. if (!line)
  169. continue;
  170. // Populate ASMLine fields not set by normalize_line():
  171. line->original = orig;
  172. line->filename = source->filename;
  173. line->next = NULL;
  174. if (IS_DIRECTIVE(line, DIR_INCLUDE)) {
  175. ErrorInfo *ei;
  176. char *path = read_include_path(line);
  177. if (!path) {
  178. ei = error_info_create(line, ET_INCLUDE, ED_INC_BAD_ARG);
  179. asm_lines_free(line);
  180. asm_lines_free(dummy.next);
  181. return ei;
  182. }
  183. if (path_has_been_loaded(path, root, *includes)) {
  184. ei = error_info_create(line, ET_INCLUDE, ED_INC_RECURSION);
  185. asm_lines_free(line);
  186. asm_lines_free(dummy.next);
  187. free(path);
  188. return ei;
  189. }
  190. DEBUG("- reading included file: %s", path)
  191. LineBuffer *incbuffer = read_source_file(path, false);
  192. free(path);
  193. if (!incbuffer) {
  194. ei = error_info_create(line, ET_INCLUDE, ED_INC_FILE_READ);
  195. asm_lines_free(line);
  196. asm_lines_free(dummy.next);
  197. return ei;
  198. }
  199. ASMInclude *include = malloc(sizeof(ASMInclude));
  200. if (!include)
  201. OUT_OF_MEMORY()
  202. include->lines = incbuffer;
  203. include->next = *includes;
  204. *includes = include;
  205. ASMLine *inchead, *inctail;
  206. if ((ei = build_asm_lines(root, incbuffer, &inchead, &inctail,
  207. includes))) {
  208. error_info_append(ei, line);
  209. asm_lines_free(line);
  210. asm_lines_free(dummy.next);
  211. return ei;
  212. }
  213. prev->next = inchead;
  214. prev = inctail;
  215. asm_lines_free(line); // Destroy only the .include line
  216. }
  217. else {
  218. prev->next = line;
  219. prev = line;
  220. }
  221. }
  222. *head = dummy.next;
  223. if (tail)
  224. *tail = prev;
  225. return NULL;
  226. }
  227. /*
  228. Read in a boolean argument from the given line and store it in *result.
  229. auto_val is used if the argument's value is "auto". Return true on success
  230. and false on failure; in the latter case, *result is not modified.
  231. */
  232. static inline bool read_bool_argument(
  233. bool *result, const ASMLine *line, const char *directive, bool auto_val)
  234. {
  235. const char *arg = line->data + (DIRECTIVE_OFFSET(line, directive) + 1);
  236. ssize_t len = line->length - (DIRECTIVE_OFFSET(line, directive) + 1);
  237. if (len <= 0 || len > 5)
  238. return false;
  239. switch (len) {
  240. case 1: // 0, 1
  241. if (*arg == '0' || *arg == '1')
  242. return (*result = *arg - '0'), true;
  243. return false;
  244. case 2: // on
  245. if (!strncmp(arg, "on", 2))
  246. return (*result = true), true;
  247. return false;
  248. case 3: // off
  249. if (!strncmp(arg, "off", 3))
  250. return (*result = false), true;
  251. return false;
  252. case 4: // true, auto
  253. if (!strncmp(arg, "true", 4))
  254. return (*result = true), true;
  255. if (!strncmp(arg, "auto", 4))
  256. return (*result = auto_val), true;
  257. return false;
  258. case 5: // false
  259. if (!strncmp(arg, "false", 5))
  260. return (*result = false), true;
  261. return false;
  262. }
  263. return false;
  264. }
  265. /*
  266. Read in an integer starting at str and ending the character before end.
  267. Store the value in *result and return true on success; else return false.
  268. */
  269. static inline bool read_integer(
  270. uint32_t *result, const char *str, const char *end)
  271. {
  272. if (end - str <= 0)
  273. return false;
  274. uint64_t value = 0;
  275. if (*str == '$') {
  276. str++;
  277. if (str == end)
  278. return false;
  279. while (str < end) {
  280. if (*str >= '0' && *str <= '9')
  281. value = value * 16 + (*str - '0');
  282. else if (*str >= 'a' && *str <= 'f')
  283. value = (value * 0x10) + 0xA + (*str - 'a');
  284. else
  285. return false;
  286. if (value >= UINT32_MAX)
  287. return false;
  288. str++;
  289. }
  290. }
  291. else {
  292. while (str < end) {
  293. if (*str < '0' || *str > '9')
  294. return false;
  295. value = (value * 10) + (*str - '0');
  296. if (value >= UINT32_MAX)
  297. return false;
  298. str++;
  299. }
  300. }
  301. *result = value;
  302. return true;
  303. }
  304. /*
  305. Read in a 32-bit int argument from the given line and store it in *result.
  306. Return true on success and false on failure; in the latter case, *result is
  307. not modified.
  308. */
  309. static inline bool read_uint32_argument(
  310. uint32_t *result, const ASMLine *line, const char *directive)
  311. {
  312. const char *arg = line->data + (DIRECTIVE_OFFSET(line, directive) + 1);
  313. ssize_t len = line->length - (DIRECTIVE_OFFSET(line, directive) + 1);
  314. uint32_t value;
  315. if (read_integer(&value, arg, arg + len))
  316. return (*result = value), true;
  317. return false;
  318. }
  319. /*
  320. Preprocess the LineBuffer into ASMLines. Change some state along the way.
  321. This function processes include directives, so read_source_file() may be
  322. called multiple times (along with the implications that has), and
  323. state->includes may be modified.
  324. On success, NULL is returned. On error, an ErrorInfo object is returned.
  325. state->lines and state->includes may still be modified.
  326. */
  327. ErrorInfo* preprocess(AssemblerState *state, const LineBuffer *source)
  328. {
  329. // TODO: if giving rom size, check header offset is in rom size range
  330. // TODO: if giving reported and actual rom size, check reported is <= actual
  331. #define CATCH_DUPES(line, first, oldval, newval) \
  332. if (first && oldval != newval) { \
  333. ei = error_info_create(line, ET_PREPROC, ED_PP_DUPLICATE); \
  334. error_info_append(ei, first); \
  335. asm_lines_free(condemned); \
  336. return ei; \
  337. } \
  338. oldval = newval; \
  339. first = line;
  340. #define REQUIRE_ARG(line, d) \
  341. if (!DIRECTIVE_HAS_ARG(line, d)) { \
  342. asm_lines_free(condemned); \
  343. return error_info_create(line, ET_PREPROC, ED_PP_NO_ARG); \
  344. }
  345. #define VALIDATE(retval) \
  346. if (!(retval)) { \
  347. asm_lines_free(condemned); \
  348. return error_info_create(line, ET_PREPROC, ED_PP_BAD_ARG); \
  349. }
  350. #define RANGE_CHECK(arg, bound) \
  351. if (arg > bound) { \
  352. asm_lines_free(condemned); \
  353. return error_info_create(line, ET_PREPROC, ED_PP_ARG_RANGE); \
  354. }
  355. DEBUG("Running preprocessor:")
  356. ErrorInfo* ei;
  357. if ((ei = build_asm_lines(source, source, &state->lines, NULL,
  358. &state->includes)))
  359. return ei;
  360. ASMLine dummy = {.next = state->lines};
  361. ASMLine *prev, *line = &dummy, *next = state->lines, *condemned = NULL;
  362. const ASMLine *first_optimizer = NULL, *first_checksum = NULL,
  363. *first_product = NULL;
  364. while ((prev = line, line = next)) {
  365. next = line->next;
  366. if (line->data[0] == DIRECTIVE_MARKER) {
  367. if (IS_DIRECTIVE(line, DIR_ORIGIN))
  368. continue; // Origins are handled by tokenizer
  369. DEBUG("- handling directive: %.*s", (int) line->length, line->data)
  370. if (IS_DIRECTIVE(line, DIR_OPTIMIZER)) {
  371. REQUIRE_ARG(line, DIR_OPTIMIZER)
  372. bool arg;
  373. VALIDATE(read_bool_argument(&arg, line, DIR_OPTIMIZER, false))
  374. CATCH_DUPES(line, first_optimizer, state->optimizer, arg)
  375. }
  376. else if (IS_DIRECTIVE(line, DIR_ROM_SIZE)) {
  377. // TODO
  378. // state->rom_size <-- value check
  379. }
  380. else if (IS_DIRECTIVE(line, DIR_ROM_HEADER)) {
  381. // TODO
  382. // state->header.offset <-- check in list of acceptable values
  383. }
  384. else if (IS_DIRECTIVE(line, DIR_ROM_CHECKSUM)) {
  385. REQUIRE_ARG(line, DIR_ROM_CHECKSUM)
  386. bool arg;
  387. VALIDATE(read_bool_argument(&arg, line, DIR_ROM_CHECKSUM, true))
  388. CATCH_DUPES(line, first_checksum, state->header.checksum, arg)
  389. }
  390. else if (IS_DIRECTIVE(line, DIR_ROM_PRODUCT)) {
  391. REQUIRE_ARG(line, DIR_ROM_PRODUCT)
  392. uint32_t arg;
  393. VALIDATE(read_uint32_argument(&arg, line, DIR_ROM_PRODUCT))
  394. RANGE_CHECK(arg, 160000)
  395. CATCH_DUPES(line, first_product, state->header.product_code, arg)
  396. }
  397. else if (IS_DIRECTIVE(line, DIR_ROM_VERSION)) {
  398. // TODO
  399. // state->header.version <-- range check
  400. }
  401. else if (IS_DIRECTIVE(line, DIR_ROM_REGION)) {
  402. // TODO
  403. // state->header.region <-- string conversion, check
  404. }
  405. else if (IS_DIRECTIVE(line, DIR_ROM_DECLSIZE)) {
  406. // TODO
  407. // state->header.rom_size <-- value/range check
  408. }
  409. else {
  410. asm_lines_free(condemned);
  411. return error_info_create(line, ET_PREPROC, ED_PP_UNKNOWN);
  412. }
  413. // Remove directive from lines, and schedule it for deletion:
  414. line->next = condemned;
  415. condemned = line;
  416. prev->next = next;
  417. line = prev;
  418. }
  419. }
  420. state->rom_size = 8; // TODO
  421. asm_lines_free(condemned);
  422. state->lines = dummy.next; // Fix list head if first line was a directive
  423. #ifdef DEBUG_MODE
  424. DEBUG("Dumping ASMLines:")
  425. const ASMLine *temp = state->lines;
  426. while (temp) {
  427. DEBUG("- %-40.*s [%s:%02zu]", (int) temp->length, temp->data,
  428. temp->filename, temp->original->lineno)
  429. temp = temp->next;
  430. }
  431. #endif
  432. return NULL;
  433. #undef VALIDATE
  434. #undef REQUIRE_ARG
  435. #undef CATCH_DUPES
  436. }