An emulator, assembler, and disassembler for the Sega Game Gear
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 

573 lines
14 KiB

  1. /* Copyright (C) 2014-2015 Ben Kurtovic <ben.kurtovic@gmail.com>
  2. Released under the terms of the MIT License. See LICENSE for details. */
  3. #include <errno.h>
  4. #include <stdio.h>
  5. #include <string.h>
  6. #include <sys/stat.h>
  7. #include "assembler.h"
  8. #include "logging.h"
  9. #include "util.h"
  10. #define DEFAULT_HEADER_OFFSET 0x7FF0
  11. #define DEFAULT_REGION "GG Export"
  12. #define SYMBOL_TABLE_BUCKETS 128
  13. /* Internal structs */
  14. struct ASMLine {
  15. char *data;
  16. size_t length;
  17. const Line *original;
  18. const char *filename;
  19. struct ASMLine *next;
  20. };
  21. typedef struct ASMLine ASMLine;
  22. struct ASMInclude {
  23. LineBuffer *lines;
  24. struct ASMInclude *next;
  25. };
  26. typedef struct ASMInclude ASMInclude;
  27. struct ASMInstruction {
  28. size_t offset;
  29. uint8_t length;
  30. uint8_t b1, b2, b3, b4;
  31. uint8_t virtual_byte;
  32. char *symbol;
  33. struct ASMInstruction *next;
  34. };
  35. typedef struct ASMInstruction ASMInstruction;
  36. struct ASMSymbol {
  37. size_t offset;
  38. char *symbol;
  39. struct ASMSymbol *next;
  40. };
  41. typedef struct ASMSymbol ASMSymbol;
  42. typedef struct {
  43. ASMSymbol *buckets[SYMBOL_TABLE_BUCKETS];
  44. } ASMSymbolTable;
  45. typedef struct {
  46. size_t offset;
  47. bool checksum;
  48. uint32_t product_code;
  49. uint8_t version;
  50. uint8_t region;
  51. uint8_t rom_size;
  52. } ASMHeaderInfo;
  53. typedef struct {
  54. ASMHeaderInfo header;
  55. bool optimizer;
  56. size_t rom_size;
  57. ASMLine *lines;
  58. ASMInclude *includes;
  59. ASMInstruction *instructions;
  60. ASMSymbolTable *symtable;
  61. } AssemblerState;
  62. /*
  63. Deallocate a LineBuffer previously created with read_source_file().
  64. */
  65. static void free_line_buffer(LineBuffer *buffer)
  66. {
  67. Line *line = buffer->lines, *temp;
  68. while (line) {
  69. temp = line->next;
  70. free(line->data);
  71. free(line);
  72. line = temp;
  73. }
  74. free(buffer->filename);
  75. free(buffer);
  76. }
  77. /*
  78. Read the contents of the source file at the given path into a line buffer.
  79. Return the buffer if reading was successful; it must be freed with
  80. free_line_buffer() when done. Return NULL if an error occurred while
  81. reading. A message will be printed to stderr in this case.
  82. */
  83. static LineBuffer* read_source_file(const char *path)
  84. {
  85. FILE *fp;
  86. struct stat st;
  87. if (!(fp = fopen(path, "r"))) {
  88. ERROR_ERRNO("couldn't open source file")
  89. return NULL;
  90. }
  91. if (fstat(fileno(fp), &st)) {
  92. fclose(fp);
  93. ERROR_ERRNO("couldn't open source file")
  94. return NULL;
  95. }
  96. if (!(st.st_mode & S_IFREG)) {
  97. fclose(fp);
  98. ERROR("couldn't open source file: %s", st.st_mode & S_IFDIR ?
  99. "Is a directory" : "Is not a regular file")
  100. return NULL;
  101. }
  102. LineBuffer *source = malloc(sizeof(LineBuffer));
  103. if (!source)
  104. OUT_OF_MEMORY()
  105. source->lines = NULL;
  106. source->filename = malloc(sizeof(char) * (strlen(path) + 1));
  107. if (!source->filename)
  108. OUT_OF_MEMORY()
  109. strcpy(source->filename, path);
  110. Line dummy = {.next = NULL};
  111. Line *line, *prev = &dummy;
  112. size_t lineno = 1;
  113. while (1) {
  114. char *data = NULL;
  115. size_t cap = 0;
  116. ssize_t len;
  117. if ((len = getline(&data, &cap, fp)) < 0) {
  118. if (feof(fp))
  119. break;
  120. if (errno == ENOMEM)
  121. OUT_OF_MEMORY()
  122. ERROR_ERRNO("couldn't read source file")
  123. fclose(fp);
  124. source->lines = dummy.next;
  125. free_line_buffer(source);
  126. return NULL;
  127. }
  128. line = malloc(sizeof(Line));
  129. if (!line)
  130. OUT_OF_MEMORY()
  131. line->data = data;
  132. line->length = feof(fp) ? len : (len - 1);
  133. line->lineno = lineno++;
  134. line->next = NULL;
  135. prev->next = line;
  136. prev = line;
  137. }
  138. fclose(fp);
  139. source->lines = dummy.next;
  140. return source;
  141. }
  142. /*
  143. Write an assembled binary file to the given path.
  144. Return whether the file was written successfully. On error, a message is
  145. printed to stderr.
  146. */
  147. static bool write_binary_file(const char *path, const uint8_t *data, size_t size)
  148. {
  149. FILE *fp;
  150. if (!(fp = fopen(path, "wb"))) {
  151. ERROR_ERRNO("couldn't open destination file")
  152. return false;
  153. }
  154. if (!fwrite(data, size, 1, fp)) {
  155. fclose(fp);
  156. ERROR_ERRNO("couldn't write to destination file")
  157. return false;
  158. }
  159. fclose(fp);
  160. return true;
  161. }
  162. /*
  163. Print an ErrorInfo object returned by assemble() to the given stream.
  164. */
  165. void error_info_print(const ErrorInfo *error_info, FILE *file)
  166. {
  167. // TODO
  168. fprintf(file, "Error: Unknown error\n");
  169. }
  170. /*
  171. Destroy an ErrorInfo object created by assemble().
  172. */
  173. void error_info_destroy(ErrorInfo *error_info)
  174. {
  175. if (!error_info)
  176. return;
  177. // TODO
  178. free(error_info);
  179. }
  180. /*
  181. Initialize default values in an AssemblerState object.
  182. */
  183. static void init_state(AssemblerState *state)
  184. {
  185. state->header.offset = DEFAULT_HEADER_OFFSET;
  186. state->header.checksum = true;
  187. state->header.product_code = 0;
  188. state->header.version = 0;
  189. state->header.region = region_string_to_code(DEFAULT_REGION);
  190. state->header.rom_size = 0;
  191. state->optimizer = false;
  192. state->rom_size = 0;
  193. state->lines = NULL;
  194. state->includes = NULL;
  195. state->instructions = NULL;
  196. state->symtable = NULL;
  197. }
  198. /*
  199. Deallocate an ASMLine list.
  200. */
  201. static void free_asm_lines(ASMLine *line)
  202. {
  203. while (line) {
  204. ASMLine *temp = line->next;
  205. free(line->data);
  206. free(line);
  207. line = temp;
  208. }
  209. }
  210. /*
  211. Deallocate an ASMInclude list.
  212. */
  213. static void free_asm_includes(ASMInclude *include)
  214. {
  215. while (include) {
  216. ASMInclude *temp = include->next;
  217. free_line_buffer(include->lines);
  218. free(include);
  219. include = temp;
  220. }
  221. }
  222. /*
  223. Deallocate an ASMInstruction list.
  224. */
  225. static void free_asm_instructions(ASMInstruction *inst)
  226. {
  227. while (inst) {
  228. ASMInstruction *temp = inst->next;
  229. if (inst->symbol)
  230. free(inst->symbol);
  231. free(inst);
  232. inst = temp;
  233. }
  234. }
  235. /*
  236. Deallocate an ASMSymbolTable.
  237. */
  238. static void free_asm_symtable(ASMSymbolTable *symtable)
  239. {
  240. if (!symtable)
  241. return;
  242. for (size_t bucket = 0; bucket < SYMBOL_TABLE_BUCKETS; bucket++) {
  243. ASMSymbol *sym = symtable->buckets[bucket], *temp;
  244. while (sym) {
  245. temp = sym->next;
  246. free(sym->symbol);
  247. free(sym);
  248. sym = temp;
  249. }
  250. }
  251. free(symtable);
  252. }
  253. /*
  254. Preprocess a single source line (source, length) into a normalized ASMLine.
  255. *Only* the data and length fields in the ASMLine object are populated. The
  256. normalization process converts tabs to spaces, removes runs of multiple
  257. spaces (outside of string literals), strips comments, and other things.
  258. Return NULL if an ASM line was not generated from the source, i.e. if it is
  259. blank after being stripped.
  260. */
  261. static ASMLine* normalize_line(const char *source, size_t length)
  262. {
  263. char *data = malloc(sizeof(char) * length);
  264. if (!data)
  265. OUT_OF_MEMORY()
  266. size_t si, di, slashes = 0;
  267. bool has_content = false, space_pending = false, in_string = false;
  268. for (si = di = 0; si < length; si++) {
  269. char c = source[si];
  270. if (c == '\\')
  271. slashes++;
  272. else
  273. slashes = 0;
  274. if (in_string) {
  275. if (c == '"' && (slashes % 2) == 0)
  276. in_string = false;
  277. data[di++] = c;
  278. } else {
  279. if (c == ';')
  280. break;
  281. if (c == '"' && (slashes % 2) == 0)
  282. in_string = true;
  283. if (c == '\t' || c == ' ')
  284. space_pending = true;
  285. else {
  286. if (space_pending) {
  287. if (has_content)
  288. data[di++] = ' ';
  289. space_pending = false;
  290. }
  291. has_content = true;
  292. data[di++] = c;
  293. }
  294. }
  295. }
  296. if (!has_content) {
  297. free(data);
  298. return NULL;
  299. }
  300. ASMLine *line = malloc(sizeof(ASMLine));
  301. if (!line)
  302. OUT_OF_MEMORY()
  303. data = realloc(data, sizeof(char) * di);
  304. if (!data)
  305. OUT_OF_MEMORY()
  306. line->data = data;
  307. line->length = di;
  308. return line;
  309. }
  310. /*
  311. Preprocess the LineBuffer into ASMLines. Change some state along the way.
  312. This function processes include directives, so read_source_file() may be
  313. called multiple times (along with the implications that has), and
  314. state->includes may be modified.
  315. On success, state->lines is modified and NULL is returned. On error, an
  316. ErrorInfo object is returned, and state->lines and state->includes are not
  317. modified.
  318. */
  319. static ErrorInfo* preprocess(AssemblerState *state, const LineBuffer *source)
  320. {
  321. // TODO
  322. // state->header.offset <-- check in list of acceptable values
  323. // state->header.checksum <-- boolean check
  324. // state->header.product_code <-- range check
  325. // state->header.version <-- range check
  326. // state->header.region <-- string conversion, check
  327. // state->header.rom_size <-- value/range check
  328. // state->optimizer <-- boolean check
  329. // state->rom_size <-- value check
  330. // if giving rom size, check header offset is in rom size range
  331. // if giving reported and actual rom size, check reported is <= actual
  332. // ensure no duplicate explicit assignments
  333. ASMLine dummy = {.next = NULL};
  334. ASMLine *line, *prev = &dummy;
  335. const Line *orig = source->lines;
  336. while (orig) {
  337. if ((line = normalize_line(orig->data, orig->length))) {
  338. line->original = orig;
  339. line->filename = source->filename;
  340. line->next = NULL;
  341. prev->next = line;
  342. prev = line;
  343. }
  344. orig = orig->next;
  345. }
  346. state->lines = dummy.next;
  347. #ifdef DEBUG_MODE
  348. DEBUG("Dumping ASMLines:")
  349. const ASMLine *temp = state->lines;
  350. while (temp) {
  351. DEBUG("- %-40.*s [%s:%02zu]", (int) temp->length, temp->data,
  352. temp->filename, temp->original->lineno)
  353. temp = temp->next;
  354. }
  355. #endif
  356. return NULL;
  357. }
  358. /*
  359. Tokenize ASMLines into ASMInstructions.
  360. On success, state->instructions is modified and NULL is returned. On error,
  361. an ErrorInfo object is returned and state->instructions is not modified.
  362. state->symtable may or may not be modified regardless of success.
  363. */
  364. static ErrorInfo* tokenize(AssemblerState *state)
  365. {
  366. // TODO
  367. // verify no instructions clash with header offset
  368. // if rom size is set, verify nothing overflows
  369. return NULL;
  370. }
  371. /*
  372. Resolve default placeholder values in assembler state, such as ROM size.
  373. On success, no new heap objects are allocated. On error, an ErrorInfo
  374. object is returned.
  375. */
  376. static ErrorInfo* resolve_defaults(AssemblerState *state)
  377. {
  378. // TODO
  379. // if (!state.rom_size)
  380. // set to max possible >= 32 KB, or error if too many instructions
  381. // if (state.header.rom_size)
  382. // check reported rom size is <= actual rom size
  383. // if (!state.header.rom_size)
  384. // set to actual rom size
  385. return NULL;
  386. }
  387. /*
  388. Resolve symbol placeholders in instructions such as jumps and branches.
  389. On success, no new heap objects are allocated. On error, an ErrorInfo
  390. object is returned.
  391. */
  392. static ErrorInfo* resolve_symbols(AssemblerState *state)
  393. {
  394. // TODO
  395. return NULL;
  396. }
  397. /*
  398. Convert finalized ASMInstructions into a binary data block.
  399. This function should never fail.
  400. */
  401. static void serialize_binary(AssemblerState *state, uint8_t *binary)
  402. {
  403. // TODO
  404. for (size_t i = 0; i < state->rom_size; i++)
  405. binary[i] = 'X';
  406. }
  407. /*
  408. Assemble the z80 source code in the source code buffer into binary data.
  409. If successful, return the size of the assembled binary data and change
  410. *binary_ptr to point to the assembled ROM data buffer. *binary_ptr must be
  411. free()'d when finished.
  412. If an error occurred, return 0 and update *ei_ptr to point to an ErrorInfo
  413. object which can be shown to the user with error_info_print(). The
  414. ErrorInfo object must be destroyed with error_info_destroy() when finished.
  415. In either case, only one of *binary_ptr and *ei_ptr is modified.
  416. */
  417. size_t assemble(const LineBuffer *source, uint8_t **binary_ptr, ErrorInfo **ei_ptr)
  418. {
  419. AssemblerState state;
  420. ErrorInfo *error_info;
  421. size_t retval = 0;
  422. init_state(&state);
  423. if ((error_info = preprocess(&state, source)))
  424. goto error;
  425. if (!(state.symtable = malloc(sizeof(ASMSymbolTable))))
  426. OUT_OF_MEMORY()
  427. for (size_t bucket = 0; bucket < SYMBOL_TABLE_BUCKETS; bucket++)
  428. state.symtable->buckets[bucket] = NULL;
  429. if ((error_info = tokenize(&state)))
  430. goto error;
  431. if ((error_info = resolve_defaults(&state)))
  432. goto error;
  433. if ((error_info = resolve_symbols(&state)))
  434. goto error;
  435. uint8_t *binary = malloc(sizeof(uint8_t) * state.rom_size);
  436. if (!binary)
  437. OUT_OF_MEMORY()
  438. serialize_binary(&state, binary);
  439. *binary_ptr = binary;
  440. retval = state.rom_size;
  441. goto cleanup;
  442. error:
  443. *ei_ptr = error_info;
  444. cleanup:
  445. free_asm_lines(state.lines);
  446. free_asm_includes(state.includes);
  447. free_asm_instructions(state.instructions);
  448. free_asm_symtable(state.symtable);
  449. return retval;
  450. }
  451. /*
  452. Assemble the z80 source code at the input path into a binary file.
  453. Return true if the operation was a success and false if it was a failure.
  454. Errors are printed to STDOUT; if the operation was successful then nothing
  455. is printed.
  456. */
  457. bool assemble_file(const char *src_path, const char *dst_path)
  458. {
  459. LineBuffer *source = read_source_file(src_path);
  460. if (!source)
  461. return false;
  462. uint8_t *binary;
  463. ErrorInfo *error_info;
  464. size_t size = assemble(source, &binary, &error_info);
  465. free_line_buffer(source);
  466. if (!size) {
  467. error_info_print(error_info, stderr);
  468. error_info_destroy(error_info);
  469. return false;
  470. }
  471. bool success = write_binary_file(dst_path, binary, size);
  472. free(binary);
  473. return success;
  474. }