An emulator, assembler, and disassembler for the Sega Game Gear
25개 이상의 토픽을 선택하실 수 없습니다. Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 

437 lines
14 KiB

  1. /* Copyright (C) 2014-2015 Ben Kurtovic <ben.kurtovic@gmail.com>
  2. Released under the terms of the MIT License. See LICENSE for details. */
  3. #include <libgen.h>
  4. #include <limits.h>
  5. #include <stdbool.h>
  6. #include <stdint.h>
  7. #include <stdio.h>
  8. #include <stdlib.h>
  9. #include <string.h>
  10. #include "preprocessor.h"
  11. #include "directives.h"
  12. #include "errors.h"
  13. #include "io.h"
  14. #include "parse_util.h"
  15. #include "../logging.h"
  16. #include "../util.h"
  17. #define MAX_REGION_SIZE 32
  18. /* Helper defines for preprocess() */
  19. #define SAVE_ARG(line, first, oldval, newval) \
  20. if (first && oldval != newval) { \
  21. ei = error_info_create(line, ET_PREPROC, ED_PP_DUPLICATE); \
  22. error_info_append(ei, first); \
  23. goto cleanup; \
  24. } \
  25. oldval = newval; \
  26. first = line;
  27. #define FAIL_ON_COND(cond, err_desc) \
  28. if ((cond)) { \
  29. ei = error_info_create(line, ET_PREPROC, err_desc); \
  30. goto cleanup; \
  31. }
  32. #define REQUIRE_ARG(line, d) \
  33. FAIL_ON_COND(!DIRECTIVE_HAS_ARG(line, d), ED_PP_NO_ARG)
  34. #define VALIDATE(retval) \
  35. FAIL_ON_COND(!(retval), ED_PP_BAD_ARG)
  36. #define RANGE_CHECK(arg, bound) \
  37. FAIL_ON_COND(arg > bound, ED_PP_ARG_RANGE)
  38. /*
  39. Preprocess a single source line (source, length) into a normalized ASMLine.
  40. *Only* the data and length fields in the ASMLine object are populated. The
  41. normalization process converts tabs to spaces, lowercases all alphabetical
  42. characters, and removes runs of multiple spaces (outside of string
  43. literals), strips comments, and other things.
  44. Return NULL if an ASM line was not generated from the source, i.e. if it is
  45. blank after being stripped.
  46. */
  47. static ASMLine* normalize_line(const char *source, size_t length)
  48. {
  49. char *data = malloc(sizeof(char) * length);
  50. if (!data)
  51. OUT_OF_MEMORY()
  52. size_t si, di, slashes = 0;
  53. bool has_content = false, space_pending = false, in_string = false;
  54. for (si = di = 0; si < length; si++) {
  55. char c = source[si];
  56. if (c == '\\')
  57. slashes++;
  58. else
  59. slashes = 0;
  60. if (in_string) {
  61. if (c == '"' && (slashes % 2) == 0)
  62. in_string = false;
  63. data[di++] = c;
  64. } else {
  65. if (c == ';')
  66. break;
  67. if (c == '"' && (slashes % 2) == 0)
  68. in_string = true;
  69. if (c >= 'A' && c <= 'Z')
  70. c += 'a' - 'A';
  71. if (c == ' ' || c == '\t')
  72. space_pending = true;
  73. else {
  74. if (space_pending) {
  75. if (has_content)
  76. data[di++] = ' ';
  77. space_pending = false;
  78. }
  79. has_content = true;
  80. data[di++] = c;
  81. }
  82. }
  83. }
  84. if (!has_content) {
  85. free(data);
  86. return NULL;
  87. }
  88. ASMLine *line = malloc(sizeof(ASMLine));
  89. if (!line)
  90. OUT_OF_MEMORY()
  91. data = realloc(data, sizeof(char) * di);
  92. if (!data)
  93. OUT_OF_MEMORY()
  94. line->data = data;
  95. line->length = di;
  96. return line;
  97. }
  98. /*
  99. Read and return the target path from an include directive.
  100. This function allocates a buffer to store the filename; it must be free()'d
  101. after calling read_source_file(). If a syntax error occurs while trying to
  102. read the path, it returns NULL.
  103. */
  104. static char* read_include_path(const ASMLine *line)
  105. {
  106. size_t maxlen = strlen(line->filename) + line->length, i, start, slashes;
  107. if (maxlen >= INT_MAX) // Allows us to safely downcast to int later
  108. return NULL;
  109. char *path = malloc(sizeof(char) * maxlen);
  110. if (!path)
  111. OUT_OF_MEMORY()
  112. if (!(i = DIRECTIVE_OFFSET(line, DIR_INCLUDE)))
  113. goto error;
  114. if (line->length - i <= 4) // Not long enough to hold a non-zero argument
  115. goto error;
  116. if (line->data[i++] != ' ' || line->data[i++] != '"')
  117. goto error;
  118. // TODO: parse escaped characters properly
  119. for (start = i, slashes = 0; i < line->length; i++) {
  120. if (line->data[i] == '"' && (slashes % 2) == 0)
  121. break;
  122. if (line->data[i] == '\\')
  123. slashes++;
  124. else
  125. slashes = 0;
  126. }
  127. if (i != line->length - 1) // Junk present after closing quote
  128. goto error;
  129. char *dup = strdup(line->filename);
  130. if (!dup)
  131. OUT_OF_MEMORY()
  132. // TODO: should normalize filenames in some way to prevent accidental dupes
  133. snprintf(path, maxlen, "%s/%.*s", dirname(dup), (int) (i - start),
  134. line->data + start);
  135. free(dup);
  136. return path;
  137. error:
  138. free(path);
  139. return NULL;
  140. }
  141. /*
  142. Return whether the given path has already been loaded.
  143. */
  144. static bool path_has_been_loaded(
  145. const char *path, const LineBuffer *root, const ASMInclude *include)
  146. {
  147. if (!strcmp(path, root->filename))
  148. return true;
  149. while (include) {
  150. if (!strcmp(path, include->lines->filename))
  151. return true;
  152. include = include->next;
  153. }
  154. return false;
  155. }
  156. /*
  157. Build a LineBuffer into a ASMLines, normalizing them along the way.
  158. This function operates recursively to handle includes, but handles no other
  159. preprocessor directives.
  160. On success, NULL is returned; *head points to the head of the new ASMLine
  161. list, and *tail to its tail (assuming it is non-NULL). On error, an
  162. ErrorInfo object is returned, and *head and *tail are not modified.
  163. *includes may be updated in either case.
  164. */
  165. static ErrorInfo* build_asm_lines(
  166. const LineBuffer *root, const LineBuffer *source, ASMLine **head,
  167. ASMLine **tail, ASMInclude **includes)
  168. {
  169. ASMLine dummy = {.next = NULL};
  170. ASMLine *line, *prev = &dummy;
  171. const Line *orig, *next_orig = source->lines;
  172. while ((orig = next_orig)) {
  173. line = normalize_line(orig->data, orig->length);
  174. next_orig = orig->next;
  175. if (!line)
  176. continue;
  177. // Populate ASMLine fields not set by normalize_line():
  178. line->original = orig;
  179. line->filename = source->filename;
  180. line->next = NULL;
  181. if (IS_DIRECTIVE(line, DIR_INCLUDE)) {
  182. ErrorInfo *ei;
  183. char *path = read_include_path(line);
  184. if (!path) {
  185. ei = error_info_create(line, ET_INCLUDE, ED_INC_BAD_ARG);
  186. asm_lines_free(line);
  187. asm_lines_free(dummy.next);
  188. return ei;
  189. }
  190. if (path_has_been_loaded(path, root, *includes)) {
  191. ei = error_info_create(line, ET_INCLUDE, ED_INC_RECURSION);
  192. asm_lines_free(line);
  193. asm_lines_free(dummy.next);
  194. free(path);
  195. return ei;
  196. }
  197. DEBUG("- reading included file: %s", path)
  198. LineBuffer *incbuffer = read_source_file(path, false);
  199. free(path);
  200. if (!incbuffer) {
  201. ei = error_info_create(line, ET_INCLUDE, ED_INC_FILE_READ);
  202. asm_lines_free(line);
  203. asm_lines_free(dummy.next);
  204. return ei;
  205. }
  206. ASMInclude *include = malloc(sizeof(ASMInclude));
  207. if (!include)
  208. OUT_OF_MEMORY()
  209. include->lines = incbuffer;
  210. include->next = *includes;
  211. *includes = include;
  212. ASMLine *inchead, *inctail;
  213. if ((ei = build_asm_lines(root, incbuffer, &inchead, &inctail,
  214. includes))) {
  215. error_info_append(ei, line);
  216. asm_lines_free(line);
  217. asm_lines_free(dummy.next);
  218. return ei;
  219. }
  220. prev->next = inchead;
  221. prev = inctail;
  222. asm_lines_free(line); // Destroy only the .include line
  223. }
  224. else {
  225. prev->next = line;
  226. prev = line;
  227. }
  228. }
  229. *head = dummy.next;
  230. if (tail)
  231. *tail = prev;
  232. return NULL;
  233. }
  234. /*
  235. Parse the region code string in an ASMLine and store it in *result.
  236. Return true on success and false on failure; in the latter case, *result is
  237. not modified.
  238. */
  239. static bool parse_region_string(uint8_t *result, const ASMLine *line)
  240. {
  241. char buffer[MAX_REGION_SIZE];
  242. size_t offset = DIRECTIVE_OFFSET(line, DIR_ROM_REGION) + 1;
  243. const char *arg = line->data + offset;
  244. ssize_t len = line->length - offset;
  245. if (len <= 2 || len >= MAX_REGION_SIZE + 2) // Account for double quotes
  246. return false;
  247. if (arg[0] != '"' || arg[len - 1] != '"')
  248. return false;
  249. strncpy(buffer, arg + 1, len - 2);
  250. buffer[len - 2] = '\0';
  251. uint8_t code = region_string_to_code(buffer);
  252. if (code)
  253. return (*result = code), true;
  254. return false;
  255. }
  256. /*
  257. Return whether the given header offset is a valid location.
  258. */
  259. static inline bool is_header_offset_valid(uint16_t offset)
  260. {
  261. return offset == 0x7FF0 || offset == 0x3FF0 || offset == 0x1FF0;
  262. }
  263. /*
  264. Preprocess the LineBuffer into ASMLines. Change some state along the way.
  265. This function processes include directives, so read_source_file() may be
  266. called multiple times (along with the implications that has), and
  267. state->includes may be modified.
  268. On success, NULL is returned. On error, an ErrorInfo object is returned.
  269. state->lines and state->includes may still be modified.
  270. */
  271. ErrorInfo* preprocess(AssemblerState *state, const LineBuffer *source)
  272. {
  273. ErrorInfo* ei = NULL;
  274. DEBUG("Running preprocessor:")
  275. if ((ei = build_asm_lines(source, source, &state->lines, NULL,
  276. &state->includes)))
  277. return ei;
  278. ASMLine dummy = {.next = state->lines};
  279. ASMLine *prev, *line = &dummy, *next = state->lines, *condemned = NULL;
  280. const ASMLine *first_optimizer = NULL, *first_offset = NULL,
  281. *first_checksum = NULL, *first_product = NULL,
  282. *first_version = NULL, *first_region = NULL;
  283. while ((prev = line, line = next)) {
  284. next = line->next;
  285. if (line->data[0] == DIRECTIVE_MARKER) {
  286. if (IS_DIRECTIVE(line, DIR_ORIGIN))
  287. continue; // Origins are handled by tokenizer
  288. DEBUG("- handling directive: %.*s", (int) line->length, line->data)
  289. if (IS_DIRECTIVE(line, DIR_OPTIMIZER)) {
  290. REQUIRE_ARG(line, DIR_OPTIMIZER)
  291. bool arg;
  292. VALIDATE(parse_bool(&arg, line, DIR_OPTIMIZER, false))
  293. SAVE_ARG(line, first_optimizer, state->optimizer, arg)
  294. }
  295. else if (IS_DIRECTIVE(line, DIR_ROM_SIZE)) {
  296. // TODO
  297. // state->rom_size <-- value check
  298. // auto
  299. }
  300. else if (IS_DIRECTIVE(line, DIR_ROM_HEADER)) {
  301. REQUIRE_ARG(line, DIR_ROM_HEADER)
  302. uint16_t arg;
  303. VALIDATE(parse_uint16(&arg, line, DIR_ROM_HEADER)) // auto
  304. VALIDATE(is_header_offset_valid(arg))
  305. SAVE_ARG(line, first_offset, state->header.offset, arg)
  306. }
  307. else if (IS_DIRECTIVE(line, DIR_ROM_CHECKSUM)) {
  308. REQUIRE_ARG(line, DIR_ROM_CHECKSUM)
  309. bool arg;
  310. VALIDATE(parse_bool(&arg, line, DIR_ROM_CHECKSUM, true))
  311. SAVE_ARG(line, first_checksum, state->header.checksum, arg)
  312. }
  313. else if (IS_DIRECTIVE(line, DIR_ROM_PRODUCT)) {
  314. REQUIRE_ARG(line, DIR_ROM_PRODUCT)
  315. uint32_t arg;
  316. VALIDATE(parse_uint32(&arg, line, DIR_ROM_PRODUCT)) // auto
  317. RANGE_CHECK(arg, 160000)
  318. SAVE_ARG(line, first_product, state->header.product_code, arg)
  319. }
  320. else if (IS_DIRECTIVE(line, DIR_ROM_VERSION)) {
  321. REQUIRE_ARG(line, DIR_ROM_VERSION)
  322. uint8_t arg;
  323. VALIDATE(parse_uint8(&arg, line, DIR_ROM_VERSION)) // auto
  324. RANGE_CHECK(arg, 0x10)
  325. SAVE_ARG(line, first_version, state->header.version, arg)
  326. }
  327. else if (IS_DIRECTIVE(line, DIR_ROM_REGION)) {
  328. REQUIRE_ARG(line, DIR_ROM_REGION)
  329. uint8_t arg;
  330. if (parse_uint8(&arg, line, DIR_ROM_REGION)) { // auto
  331. RANGE_CHECK(arg, 0x10)
  332. VALIDATE(region_code_to_string(arg))
  333. } else {
  334. VALIDATE(parse_region_string(&arg, line))
  335. }
  336. SAVE_ARG(line, first_region, state->header.region, arg)
  337. }
  338. else if (IS_DIRECTIVE(line, DIR_ROM_DECLSIZE)) {
  339. // TODO
  340. // state->header.rom_size <-- value/range check
  341. // auto
  342. }
  343. else {
  344. ei = error_info_create(line, ET_PREPROC, ED_PP_UNKNOWN);
  345. goto cleanup;
  346. }
  347. // Remove directive from lines, and schedule it for deletion:
  348. line->next = condemned;
  349. condemned = line;
  350. prev->next = next;
  351. line = prev;
  352. }
  353. }
  354. // TODO: if giving rom size, check header offset is in rom size range
  355. // TODO: if giving reported and actual rom size, check reported is <= actual
  356. state->rom_size = 8; // TODO
  357. #ifdef DEBUG_MODE
  358. DEBUG("Dumping ASMLines:")
  359. const ASMLine *temp = state->lines;
  360. while (temp) {
  361. DEBUG("- %-40.*s [%s:%02zu]", (int) temp->length, temp->data,
  362. temp->filename, temp->original->lineno)
  363. temp = temp->next;
  364. }
  365. #endif
  366. cleanup:
  367. asm_lines_free(condemned);
  368. state->lines = dummy.next; // Fix list head if first line was a directive
  369. return ei;
  370. }