An emulator, assembler, and disassembler for the Sega Game Gear
Du kan inte välja fler än 25 ämnen Ämnen måste starta med en bokstav eller siffra, kan innehålla bindestreck ('-') och vara max 35 tecken långa.

parse_util.c 15 KiB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556
  1. /* Copyright (C) 2014-2015 Ben Kurtovic <ben.kurtovic@gmail.com>
  2. Released under the terms of the MIT License. See LICENSE for details. */
  3. #include <limits.h>
  4. #include <stdlib.h>
  5. #include <string.h>
  6. #include "parse_util.h"
  7. #include "directives.h"
  8. #include "../logging.h"
  9. #include "../util.h"
  10. #define MAX_REGION_SIZE 32
  11. #define LCASE(c) ((c >= 'A' && c <= 'Z') ? (c + 'a' - 'A') : c)
  12. #define DIRECTIVE_PARSE_FUNC(name, type) \
  13. bool dparse_##name(type *result, const ASMLine *line, const char *directive)
  14. /*
  15. All public functions in this file follow the same return conventions:
  16. - Return true on success and false on failure.
  17. - *result is only modified on success.
  18. */
  19. /*
  20. Adjust *arg_ptr / *size_ptr for an indirect argument, like (hl) or (ix+*).
  21. *size_ptr must be > 2 to begin with, and is assured to be > 0 on success.
  22. The two arguments are not modified on failure.
  23. */
  24. static bool adjust_for_indirection(const char **arg_ptr, ssize_t *size_ptr)
  25. {
  26. const char *arg = *arg_ptr;
  27. ssize_t size = *size_ptr;
  28. if (arg[0] != '(' || arg[size - 1] != ')')
  29. return false;
  30. arg++;
  31. size -= 2;
  32. if (arg[0] == ' ') {
  33. arg++;
  34. if (--size <= 0)
  35. return false;
  36. }
  37. if (arg[size - 1] == ' ') {
  38. if (--size <= 0)
  39. return false;
  40. }
  41. *arg_ptr = arg;
  42. *size_ptr = size;
  43. return true;
  44. }
  45. /*
  46. Read in a boolean value and store it in *result.
  47. */
  48. bool parse_bool(bool *result, const char *arg, ssize_t size)
  49. {
  50. switch (size) {
  51. case 1: // 0, 1
  52. if (*arg == '0' || *arg == '1')
  53. return (*result = *arg - '0'), true;
  54. return false;
  55. case 2: // on
  56. if (!strncmp(arg, "on", 2))
  57. return (*result = true), true;
  58. return false;
  59. case 3: // off
  60. if (!strncmp(arg, "off", 3))
  61. return (*result = false), true;
  62. return false;
  63. case 4: // true
  64. if (!strncmp(arg, "true", 4))
  65. return (*result = true), true;
  66. return false;
  67. case 5: // false
  68. if (!strncmp(arg, "false", 5))
  69. return (*result = false), true;
  70. return false;
  71. }
  72. return false;
  73. }
  74. /*
  75. Read in a 32-bit integer and store it in *result.
  76. */
  77. bool parse_uint32_t(uint32_t *result, const char *arg, ssize_t size)
  78. {
  79. if (size <= 0)
  80. return false;
  81. const char *end = arg + size;
  82. uint64_t value = 0;
  83. if (*arg == '$') {
  84. arg++;
  85. if (arg == end)
  86. return false;
  87. while (arg < end) {
  88. if (*arg >= '0' && *arg <= '9')
  89. value = (value * 0x10) + (*arg - '0');
  90. else if (*arg >= 'a' && *arg <= 'f')
  91. value = (value * 0x10) + 0xA + (*arg - 'a');
  92. else
  93. return false;
  94. if (value > UINT32_MAX)
  95. return false;
  96. arg++;
  97. }
  98. }
  99. else {
  100. while (arg < end) {
  101. if (*arg < '0' || *arg > '9')
  102. return false;
  103. value = (value * 10) + (*arg - '0');
  104. if (value > UINT32_MAX)
  105. return false;
  106. arg++;
  107. }
  108. }
  109. *result = value;
  110. return true;
  111. }
  112. /*
  113. Read in a string, possibly with escape sequences, and store it in *result.
  114. *length is also updated to the size of the string, which is *not*
  115. null-terminated. *result must be free()'d when finished.
  116. */
  117. bool parse_string(char **result, size_t *length, const char *arg, ssize_t size)
  118. {
  119. if (size < 2 || arg[0] != '"' || arg[size - 1] != '"')
  120. return false;
  121. ssize_t i, slashes = 0;
  122. for (i = 1; i < size; i++) {
  123. if (arg[i] == '"' && (slashes % 2) == 0)
  124. break;
  125. // TODO: parse escape codes here
  126. if (arg[i] == '\\')
  127. slashes++;
  128. else
  129. slashes = 0;
  130. }
  131. if (i != size - 1) // Junk present after closing quote
  132. return false;
  133. *length = size - 2;
  134. *result = malloc(sizeof(char) * (*length));
  135. if (!*result)
  136. OUT_OF_MEMORY()
  137. memcpy(*result, arg + 1, *length);
  138. return true;
  139. }
  140. /*
  141. Read in a space-separated sequence of bytes and store it in *result.
  142. *length is also updated to the number of bytes in the array. *result must
  143. be free()'d when finished.
  144. */
  145. bool parse_bytes(uint8_t **result, size_t *length, const char *arg, ssize_t size)
  146. {
  147. if (size <= 0)
  148. return false;
  149. const char *end = arg + size;
  150. uint8_t *bytes = NULL;
  151. size_t nbytes = 0;
  152. while (arg < end) {
  153. const char *start = arg;
  154. while (arg != end && *arg != ' ' && *arg != ',')
  155. arg++;
  156. uint32_t temp;
  157. if (!parse_uint32_t(&temp, start, arg - start) || temp > UINT8_MAX) {
  158. free(bytes);
  159. return false;
  160. }
  161. nbytes++;
  162. bytes = realloc(bytes, sizeof(uint8_t) * nbytes);
  163. if (!bytes)
  164. OUT_OF_MEMORY()
  165. bytes[nbytes - 1] = temp;
  166. if (arg < end - 1 && *arg == ',' && *(arg + 1) == ' ')
  167. arg += 2;
  168. else if (arg++ >= end)
  169. break;
  170. }
  171. *result = bytes;
  172. *length = nbytes;
  173. return true;
  174. }
  175. /*
  176. Read in a register argument and store it in *result.
  177. */
  178. bool argparse_register(ASMArgRegister *result, const char *arg, ssize_t size)
  179. {
  180. if (size < 1 || size > 3)
  181. return false;
  182. char buf[3] = {'\0'};
  183. switch (size) {
  184. case 3: buf[2] = LCASE(arg[2]);
  185. case 2: buf[1] = LCASE(arg[1]);
  186. case 1: buf[0] = LCASE(arg[0]);
  187. }
  188. switch (size) {
  189. case 1:
  190. switch (buf[0]) {
  191. case 'a': return (*result = REG_A), true;
  192. case 'f': return (*result = REG_F), true;
  193. case 'b': return (*result = REG_B), true;
  194. case 'c': return (*result = REG_C), true;
  195. case 'd': return (*result = REG_D), true;
  196. case 'e': return (*result = REG_E), true;
  197. case 'h': return (*result = REG_H), true;
  198. case 'l': return (*result = REG_L), true;
  199. case 'i': return (*result = REG_I), true;
  200. case 'r': return (*result = REG_R), true;
  201. }
  202. return false;
  203. case 2:
  204. switch ((buf[0] << 8) + buf[1]) {
  205. case 0x6166: return (*result = REG_AF), true;
  206. case 0x6263: return (*result = REG_BC), true;
  207. case 0x6465: return (*result = REG_DE), true;
  208. case 0x686C: return (*result = REG_HL), true;
  209. case 0x6978: return (*result = REG_IX), true;
  210. case 0x6979: return (*result = REG_IY), true;
  211. case 0x7063: return (*result = REG_PC), true;
  212. case 0x7370: return (*result = REG_SP), true;
  213. }
  214. return false;
  215. case 3:
  216. switch ((buf[0] << 16) + (buf[1] << 8) + buf[2]) {
  217. case 0x616627: return (*result = REG_AF_), true;
  218. case 0x697868: return (*result = REG_IXH), true;
  219. case 0x69786C: return (*result = REG_IXL), true;
  220. case 0x697968: return (*result = REG_IYH), true;
  221. case 0x69796C: return (*result = REG_IYL), true;
  222. }
  223. return false;
  224. }
  225. return false;
  226. }
  227. /*
  228. Read in a condition argument and store it in *result.
  229. */
  230. bool argparse_condition(ASMArgCondition *result, const char *arg, ssize_t size)
  231. {
  232. if (size < 1 || size > 2)
  233. return false;
  234. char buf[2] = {'\0'};
  235. switch (size) {
  236. case 2: buf[1] = LCASE(arg[1]);
  237. case 1: buf[0] = LCASE(arg[0]);
  238. }
  239. switch (size) {
  240. case 1:
  241. switch (buf[0]) {
  242. case 'n': return (*result = COND_N), true;
  243. case 'c': return (*result = COND_C), true;
  244. case 'p': return (*result = COND_P), true;
  245. case 'm': return (*result = COND_M), true;
  246. }
  247. return false;
  248. case 2:
  249. switch ((buf[0] << 8) + buf[1]) {
  250. case 0x6E7A: return (*result = COND_NZ), true;
  251. case 0x6E63: return (*result = COND_NC), true;
  252. case 0x706F: return (*result = COND_PO), true;
  253. case 0x7065: return (*result = COND_PE), true;
  254. }
  255. return false;
  256. }
  257. return false;
  258. }
  259. /*
  260. Read in an immediate argument and store it in *result.
  261. */
  262. bool argparse_immediate(ASMArgImmediate *result, const char *arg, ssize_t size)
  263. {
  264. bool negative = false;
  265. ssize_t i = 0;
  266. if (size <= 0)
  267. return false;
  268. while (arg[i] == '-' || arg[i] == '+' || arg[i] == ' ') {
  269. if (arg[i] == '-')
  270. negative = !negative;
  271. if (++i >= size)
  272. return false;
  273. }
  274. uint32_t uval;
  275. if (!parse_uint32_t(&uval, arg + i, size - i) || uval > UINT16_MAX)
  276. return false;
  277. int32_t sval = negative ? -uval : uval;
  278. if (sval < INT16_MIN)
  279. return false;
  280. result->uval = uval;
  281. result->sval = sval;
  282. result->mask = 0;
  283. if (sval < 0) {
  284. if (sval >= INT8_MIN)
  285. result->mask |= IMM_S8;
  286. if (sval >= INT8_MIN + 2)
  287. result->mask |= IMM_REL;
  288. } else {
  289. result->mask = IMM_U16;
  290. if (uval <= UINT8_MAX)
  291. result->mask |= IMM_U8;
  292. if (uval <= INT8_MAX)
  293. result->mask |= IMM_S8;
  294. if (uval <= INT8_MAX + 2)
  295. result->mask |= IMM_REL;
  296. if (uval <= 7)
  297. result->mask |= IMM_BIT;
  298. if (!(uval & ~0x38))
  299. result->mask |= IMM_RST;
  300. if (uval <= 2)
  301. result->mask |= IMM_IM;
  302. }
  303. return true;
  304. }
  305. /*
  306. Read in an indirect argument and store it in *result.
  307. */
  308. bool argparse_indirect(ASMArgIndirect *result, const char *arg, ssize_t size)
  309. {
  310. if (size < 3 || !adjust_for_indirection(&arg, &size))
  311. return false;
  312. ASMArgRegister reg;
  313. ASMArgImmediate imm;
  314. if (argparse_register(&reg, arg, size)) {
  315. if (reg == REG_BC || reg == REG_DE || reg == REG_HL) {
  316. result->type = AT_REGISTER;
  317. result->addr.reg = reg;
  318. return true;
  319. }
  320. } else if (argparse_immediate(&imm, arg, size)) {
  321. if (imm.mask & IMM_U16) {
  322. result->type = AT_IMMEDIATE;
  323. result->addr.imm = imm;
  324. return true;
  325. }
  326. }
  327. return false;
  328. }
  329. /*
  330. Read in an indexed argument and store it in *result.
  331. */
  332. bool argparse_indexed(ASMArgIndexed *result, const char *arg, ssize_t size)
  333. {
  334. if (size < 4 || !adjust_for_indirection(&arg, &size) || size < 2)
  335. return false;
  336. ASMArgRegister reg;
  337. if (arg[0] != 'i')
  338. return false;
  339. if (arg[1] == 'x')
  340. reg = REG_IX;
  341. else if (arg[1] == 'y')
  342. reg = REG_IY;
  343. else
  344. return false;
  345. arg += 2;
  346. size -= 2;
  347. if (size > 0 && arg[0] == ' ') {
  348. arg++;
  349. size--;
  350. }
  351. if (size > 0) {
  352. ASMArgImmediate imm;
  353. if (!argparse_immediate(&imm, arg, size) || !(imm.mask & IMM_S8))
  354. return false;
  355. result->offset = imm.sval;
  356. } else {
  357. result->offset = 0;
  358. }
  359. result->reg = reg;
  360. return true;
  361. }
  362. /*
  363. Read in a label argument and store it in *result.
  364. */
  365. bool argparse_label(ASMArgLabel *result, const char *arg, ssize_t size)
  366. {
  367. if (size >= MAX_SYMBOL_SIZE)
  368. return false;
  369. for (const char *i = arg; i < arg + size; i++) {
  370. char c = *i;
  371. if (!((c >= 'a' && c <= 'z') || (i != arg && c >= '0' && c <= '9') ||
  372. c == '_' || c == '.'))
  373. return false;
  374. }
  375. strncpy(result->text, arg, size);
  376. result->text[size] = '\0';
  377. return true;
  378. }
  379. /*
  380. Read in a boolean argument from the given line and store it in *result.
  381. */
  382. DIRECTIVE_PARSE_FUNC(bool, bool)
  383. {
  384. size_t offset = DIRECTIVE_OFFSET(line, directive) + 1;
  385. return parse_bool(result, line->data + offset, line->length - offset);
  386. }
  387. /*
  388. Read in a 32-bit int argument from the given line and store it in *result.
  389. */
  390. DIRECTIVE_PARSE_FUNC(uint32_t, uint32_t)
  391. {
  392. size_t offset = DIRECTIVE_OFFSET(line, directive) + 1;
  393. return parse_uint32_t(result, line->data + offset, line->length - offset);
  394. }
  395. /*
  396. Read in a 16-bit int argument from the given line and store it in *result.
  397. */
  398. DIRECTIVE_PARSE_FUNC(uint16_t, uint16_t)
  399. {
  400. uint32_t value;
  401. if (dparse_uint32_t(&value, line, directive) && value <= UINT16_MAX)
  402. return (*result = value), true;
  403. return false;
  404. }
  405. /*
  406. Read in an 8-bit int argument from the given line and store it in *result.
  407. */
  408. DIRECTIVE_PARSE_FUNC(uint8_t, uint8_t)
  409. {
  410. uint32_t value;
  411. if (dparse_uint32_t(&value, line, directive) && value <= UINT8_MAX)
  412. return (*result = value), true;
  413. return false;
  414. }
  415. /*
  416. Parse a ROM size string in an ASMLine and store it in *result.
  417. */
  418. DIRECTIVE_PARSE_FUNC(rom_size, uint32_t)
  419. {
  420. const char *arg = line->data + DIRECTIVE_OFFSET(line, directive) + 1;
  421. const char *end = line->data + line->length - 1;
  422. if (end - arg < 5)
  423. return false;
  424. if (*(arg++) != '"' || *(end--) != '"')
  425. return false;
  426. if (*end != 'B' && *end != 'b')
  427. return false;
  428. end--;
  429. uint32_t factor;
  430. if (*end == 'K' || *end == 'k')
  431. factor = 1 << 10;
  432. else if (*end == 'M' || *end == 'm')
  433. factor = 1 << 20;
  434. else
  435. return false;
  436. end--;
  437. if (*end != ' ')
  438. return false;
  439. uint32_t value = 0;
  440. while (arg < end) {
  441. if (*arg < '0' || *arg > '9')
  442. return false;
  443. value = (value * 10) + (*arg - '0');
  444. if (value > UINT16_MAX)
  445. return false;
  446. arg++;
  447. }
  448. *result = value * factor;
  449. return true;
  450. }
  451. /*
  452. Parse a region code string in an ASMLine and store it in *result.
  453. */
  454. DIRECTIVE_PARSE_FUNC(region_string, uint8_t)
  455. {
  456. char buffer[MAX_REGION_SIZE];
  457. size_t offset = DIRECTIVE_OFFSET(line, directive) + 1;
  458. const char *arg = line->data + offset;
  459. ssize_t len = line->length - offset;
  460. if (len <= 2 || len >= MAX_REGION_SIZE + 2) // Account for double quotes
  461. return false;
  462. if (arg[0] != '"' || arg[len - 1] != '"')
  463. return false;
  464. strncpy(buffer, arg + 1, len - 2);
  465. buffer[len - 2] = '\0';
  466. uint8_t code = region_string_to_code(buffer);
  467. if (code)
  468. return (*result = code), true;
  469. return false;
  470. }
  471. /*
  472. Parse a size code in an ASMLine and store it in *result.
  473. */
  474. DIRECTIVE_PARSE_FUNC(size_code, uint8_t)
  475. {
  476. uint32_t bytes;
  477. if (!dparse_uint32_t(&bytes, line, directive)) {
  478. if (!dparse_rom_size(&bytes, line, directive))
  479. return false;
  480. }
  481. uint8_t code = size_bytes_to_code(bytes);
  482. if (code != INVALID_SIZE_CODE)
  483. return (*result = code), true;
  484. return false;
  485. }