|
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540 |
- /* Copyright (C) 2014-2016 Ben Kurtovic <ben.kurtovic@gmail.com>
- Released under the terms of the MIT License. See LICENSE for details. */
-
- #include <stdlib.h>
- #include <string.h>
-
- #include "tokenizer.h"
- #include "directives.h"
- #include "instructions.h"
- #include "inst_args.h"
- #include "parse_util.h"
- #include "../mmu.h"
- #include "../rom.h"
- #include "../util.h"
-
- /* Internal structs */
-
- typedef struct {
- size_t size;
- const ASMLine **overlap_table;
- const ASMLine **overlap_origins;
- const ASMLine *origin;
- uint8_t bank;
- bool cross_blocks;
- } ASMLayoutInfo;
-
- typedef struct {
- int8_t slots[MMU_NUM_ROM_BANKS];
- const ASMLine *lines[MMU_NUM_ROM_BANKS];
- } ASMSlotInfo;
-
- /* Sentinel values for overlap table */
-
- const ASMLine header_sentinel, bounds_sentinel;
-
- /* Typedef for parse_util data parser functions */
-
- typedef bool (*parser_func)(uint8_t**, size_t*, const char*, ssize_t);
-
- /*
- Return the address of a given ROM offset when mapped into the given slot.
- */
- static inline uint16_t map_into_slot(size_t offset, int8_t slot)
- {
- return (slot * MMU_ROM_BANK_SIZE) + (offset & (MMU_ROM_BANK_SIZE - 1));
- }
-
- /*
- Return the default slot associated with a given memory bank.
- */
- static inline int8_t default_bank_slot(uint8_t bank)
- {
- return bank > 2 ? 2 : bank;
- }
-
- /*
- Initialize an ASMLayoutInfo object.
- */
- static void init_layout_info(ASMLayoutInfo *li, AssemblerState *state)
- {
- li->size = state->rom_size ? state->rom_size : ROM_SIZE_MAX;
- li->origin = NULL;
- li->bank = 0;
- li->cross_blocks = state->cross_blocks;
- li->overlap_table = cr_calloc(li->size, sizeof(const ASMLine*));
- li->overlap_origins = cr_calloc(li->size, sizeof(const ASMLine*));
-
- for (size_t i = 0; i < HEADER_SIZE; i++)
- li->overlap_table[state->header.offset + i] = &header_sentinel;
- }
-
- /*
- Free the resources allocated by an ASMLayoutInfo object.
- */
- static void free_layout_info(ASMLayoutInfo *li)
- {
- free(li->overlap_table);
- free(li->overlap_origins);
- }
-
- /*
- Add a given line, representing a label, to the symbol table.
-
- Return NULL on success and an ErrorInfo object on failure (e.g. in the case
- of duplicate labels, or labels sharing names with registers/conditions).
- */
- static ErrorInfo* add_label_to_table(
- ASMSymbolTable *symtable, const ASMLine *line, size_t offset, int8_t slot)
- {
- if (line->length - 1 >= MAX_SYMBOL_SIZE)
- return error_info_create(line, ET_SYMBOL, ED_SYM_TOO_LONG);
-
- ASMArgParseInfo info = {.arg = line->data, .size = line->length - 1};
- ASMArgRegister reg;
- if (argparse_register(®, info))
- return error_info_create(line, ET_SYMBOL, ED_SYM_IS_REGISTER);
-
- ASMArgCondition cond;
- if (argparse_condition(&cond, info))
- return error_info_create(line, ET_SYMBOL, ED_SYM_IS_CONDITION);
-
- char *symbol = cr_strndup(line->data, line->length - 1);
- const ASMSymbol *current = asm_symtable_find(symtable, symbol);
- if (current) {
- ErrorInfo *ei = error_info_create(line, ET_SYMBOL, ED_SYM_DUPE_LABELS);
- error_info_append(ei, current->line);
- free(symbol);
- return ei;
- }
-
- ASMSymbol *label = cr_malloc(sizeof(ASMSymbol));
- label->offset = map_into_slot(offset,
- (slot >= 0) ? slot : default_bank_slot(offset / MMU_ROM_BANK_SIZE));
- label->symbol = symbol;
- label->line = line;
- asm_symtable_insert(symtable, label);
- return NULL;
- }
-
- /*
- Handle a define directive by adding an entry to the define table.
-
- Return NULL on success and an ErrorInfo object on failure.
- */
- static ErrorInfo* handle_define_directive(
- const ASMLine *line, ASMDefineTable *deftab)
- {
- if (!DIRECTIVE_HAS_ARG(line, DIR_DEFINE))
- return error_info_create(line, ET_PREPROC, ED_PP_NO_ARG);
-
- size_t start = DIRECTIVE_OFFSET(line, DIR_DEFINE) + 1, i;
- for (i = start; i < line->length; i++) {
- if (!is_valid_symbol_char(line->data[i], i == start)) {
- if (line->data[i] == ' ' && i > start) {
- i++;
- break;
- }
- return error_info_create(line, ET_PREPROC, ED_PP_BAD_ARG);
- }
- }
-
- if (i >= line->length) // Missing value for define
- return error_info_create(line, ET_PREPROC, ED_PP_NO_ARG);
-
- const char *key = line->data + start;
- size_t keylen = i - start - 1;
-
- const ASMDefine *current = asm_deftable_find(deftab, key, keylen);
- if (current) {
- ErrorInfo *ei = error_info_create(line, ET_PREPROC, ED_PP_DUPLICATE);
- error_info_append(ei, current->line);
- return ei;
- }
-
- ASMArgImmediate imm;
- ASMArgParseInfo info = {
- .arg = line->data + i, .size = line->length - i, .deftable = deftab};
- if (!argparse_immediate(&imm, info) || imm.is_label)
- return error_info_create(line, ET_PREPROC, ED_PP_BAD_ARG);
-
- ASMDefine *define = cr_malloc(sizeof(ASMDefine));
- define->name = cr_strndup(key, keylen);
- define->value = imm;
- define->line = line;
- asm_deftable_insert(deftab, define);
- return NULL;
- }
-
- /*
- Handle an undefine directive by remove an entry in the define table.
-
- Return NULL on success and an ErrorInfo object on failure.
- */
- static ErrorInfo* handle_undef_directive(
- const ASMLine *line, ASMDefineTable *deftab)
- {
- if (!DIRECTIVE_HAS_ARG(line, DIR_UNDEF))
- return error_info_create(line, ET_PREPROC, ED_PP_NO_ARG);
-
- size_t offset = DIRECTIVE_OFFSET(line, DIR_UNDEF) + 1;
- const char *arg = line->data + offset;
- size_t size = line->length - offset, i;
-
- for (i = 0; i < size; i++) {
- if (!is_valid_symbol_char(arg[i], i == 0))
- return error_info_create(line, ET_PREPROC, ED_PP_BAD_ARG);
- }
-
- asm_deftable_remove(deftab, arg, size);
- return NULL;
- }
-
- /*
- Handle an origin directive by updating the offset.
-
- Return NULL on success and an ErrorInfo object on failure.
- */
- static ErrorInfo* handle_origin_directive(const ASMLine *line, size_t *offset)
- {
- if (!DIRECTIVE_HAS_ARG(line, DIR_ORIGIN))
- return error_info_create(line, ET_PREPROC, ED_PP_NO_ARG);
-
- uint32_t arg;
- if (!dparse_uint32_t(&arg, line, DIR_ORIGIN))
- return error_info_create(line, ET_PREPROC, ED_PP_BAD_ARG);
-
- if (arg >= ROM_SIZE_MAX)
- return error_info_create(line, ET_PREPROC, ED_PP_ARG_RANGE);
-
- *offset = arg;
- return NULL;
- }
-
- /*
- Handle a block directive by updating the offset and slot.
-
- Return NULL on success and an ErrorInfo object on failure.
- */
- static ErrorInfo* handle_block_directive(
- const ASMLine *line, size_t *offset, ASMSlotInfo *si)
- {
- if (!DIRECTIVE_HAS_ARG(line, DIR_BLOCK))
- return error_info_create(line, ET_PREPROC, ED_PP_NO_ARG);
-
- uint8_t *args, bank, slot;
- size_t dir_offset = DIRECTIVE_OFFSET(line, DIR_BLOCK) + 1, nargs;
-
- if (!parse_bytes(&args, &nargs, line->data + dir_offset,
- line->length - dir_offset))
- return error_info_create(line, ET_PREPROC, ED_PP_BAD_ARG);
- if (nargs < 1 || nargs > 2)
- return free(args), error_info_create(line, ET_PREPROC, ED_PP_BAD_ARG);
-
- bank = args[0];
- slot = nargs == 2 ? args[1] : default_bank_slot(bank);
- free(args);
-
- if (bank >= MMU_NUM_ROM_BANKS || slot >= MMU_NUM_SLOTS)
- return error_info_create(line, ET_PREPROC, ED_PP_ARG_RANGE);
- if (bank == 0 && slot != 0)
- return error_info_create(line, ET_LAYOUT, ED_LYT_BLOCK0);
- if (si->slots[bank] >= 0 && si->slots[bank] != slot) {
- ErrorInfo *ei = error_info_create(line, ET_LAYOUT, ED_LYT_SLOTS);
- error_info_append(ei, si->lines[bank]);
- return ei;
- }
-
- *offset = bank * MMU_ROM_BANK_SIZE;
- si->slots[bank] = slot;
- if (!si->lines[bank])
- si->lines[bank] = line;
- return NULL;
- }
-
- /*
- Parse a .space directive, which fills a region with a single byte.
- */
- static bool parse_space(
- uint8_t **result, size_t *length, const char *arg, ssize_t size)
- {
- uint8_t *bytes;
- size_t nbytes;
- if (!parse_bytes(&bytes, &nbytes, arg, size))
- return false;
-
- if (nbytes < 1 || nbytes > 2) {
- free(bytes);
- return false;
- }
-
- *length = bytes[0];
- *result = cr_malloc(sizeof(uint8_t) * (*length));
- memset(*result, nbytes == 2 ? bytes[1] : 0, *length);
- free(bytes);
- return true;
- }
-
- /*
- Parse a string like parse_string(), but null-terminate it.
- */
- static bool parse_cstring(
- char **result, size_t *length, const char *arg, ssize_t size)
- {
- if (!parse_string(result, length, arg, size))
- return false;
-
- (*length)++;
- *result = cr_realloc(*result, sizeof(char) * (*length));
- (*result)[*length - 1] = '\0';
- return true;
- }
-
- /*
- Parse data encoded in a line into an ASMData object.
-
- On success, return NULL and store the instruction in *data_ptr. On failure,
- return an ErrorInfo object; *data_ptr is not modified.
- */
- static ErrorInfo* parse_data(
- const ASMLine *line, ASMData **data_ptr, size_t offset)
- {
- const char *directive;
- parser_func parser;
-
- if (IS_DIRECTIVE(line, DIR_BYTE)) {
- directive = DIR_BYTE;
- parser = parse_bytes;
- } else if (IS_DIRECTIVE(line, DIR_SPACE)) {
- directive = DIR_SPACE;
- parser = parse_space;
- } else if (IS_DIRECTIVE(line, DIR_ASCII)) {
- directive = DIR_ASCII;
- parser = (parser_func) parse_string;
- } else if (IS_DIRECTIVE(line, DIR_ASCIZ)) {
- directive = DIR_ASCIZ;
- parser = (parser_func) parse_cstring;
- } else if (IS_DIRECTIVE(line, DIR_ASCIIZ)) {
- directive = DIR_ASCIIZ;
- parser = (parser_func) parse_cstring;
- } else {
- return error_info_create(line, ET_PREPROC, ED_PP_UNKNOWN);
- }
-
- if (!DIRECTIVE_HAS_ARG(line, directive))
- return error_info_create(line, ET_PREPROC, ED_PP_NO_ARG);
-
- size_t dir_offset = DIRECTIVE_OFFSET(line, directive) + 1;
- const char *arg = line->data + dir_offset;
- size_t arglen = line->length - dir_offset;
-
- ASMData *data = cr_malloc(sizeof(ASMData));
- data->loc.offset = offset;
- data->next = NULL;
-
- if (!parser(&data->bytes, &data->loc.length, arg, arglen)) {
- free(data);
- return error_info_create(line, ET_PREPROC, ED_PP_BAD_ARG);
- }
-
- *data_ptr = data;
- return NULL;
- }
-
- /*
- Parse an instruction encoded in a line into an ASMInstruction object.
-
- On success, return NULL and store the instruction in *inst_ptr. On failure,
- return an ErrorInfo object; *inst_ptr is not modified.
- */
- static ErrorInfo* parse_instruction(
- const ASMLine *line, ASMInstruction **inst_ptr, size_t offset,
- ASMDefineTable *deftab)
- {
- char mnemonic[MAX_MNEMONIC_SIZE] = {0};
- size_t i = 0;
- while (i < line->length) {
- char c = line->data[i];
- if (c == ' ')
- break;
- if (i >= MAX_MNEMONIC_SIZE)
- return error_info_create(line, ET_PARSER, ED_PS_OP_TOO_LONG);
- if ((c < 'a' || c > 'z') && (c < '0' || c > '9'))
- return error_info_create(line, ET_PARSER, ED_PS_OP_INVALID);
- mnemonic[i++] = c;
- }
-
- if (i < MIN_MNEMONIC_SIZE)
- return error_info_create(line, ET_PARSER, ED_PS_OP_TOO_SHORT);
-
- if (i + 1 < line->length)
- i++; // Advance past space
-
- uint8_t *bytes;
- size_t arglen = line->length - i, length;
- char *argstart = arglen > 0 ? line->data + i : NULL, *symbol = NULL;
-
- ASMInstParser parser = get_inst_parser(mnemonic);
- if (!parser)
- return error_info_create(line, ET_PARSER, ED_PS_OP_UNKNOWN);
-
- ASMArgParseInfo ai = {.arg = argstart, .size = arglen, .deftable = deftab};
- ASMErrorDesc edesc = parser(&bytes, &length, &symbol, ai);
- if (edesc != ED_NONE)
- return error_info_create(line, ET_PARSER, edesc);
-
- ASMInstruction *inst = cr_malloc(sizeof(ASMInstruction));
- inst->loc.offset = offset;
- inst->loc.length = length;
- inst->bytes = bytes;
- inst->symbol = symbol;
- inst->line = line;
- inst->next = NULL;
-
- *inst_ptr = inst;
- return NULL;
- }
-
- /*
- Check if the given object location is legal.
-
- Checks include ROM size bounding, overlapping with existing objects, and
- block-crossing assuming the .cross_blocks directive has not been specified.
-
- On success, return NULL and add the location to the overlap table.
- On failure, return an ErrorInfo object.
- */
- static ErrorInfo* check_layout(
- ASMLayoutInfo *li, const ASMLocation *loc, const ASMLine *line)
- {
- const ASMLine *clash = NULL, *clash_origin;
- if (loc->offset + loc->length > li->size) {
- clash = &bounds_sentinel;
- } else {
- for (size_t i = 0; i < loc->length; i++) {
- if (li->overlap_table[loc->offset + i]) {
- clash = li->overlap_table[loc->offset + i];
- clash_origin = li->overlap_origins[loc->offset + i];
- break;
- }
- }
- }
-
- if (clash) {
- ErrorInfo *ei = error_info_create(line, ET_LAYOUT,
- (clash == &header_sentinel) ? ED_LYT_OVERLAP_HEAD :
- (clash == &bounds_sentinel) ? ED_LYT_BOUNDS : ED_LYT_OVERLAP);
-
- if (li->origin)
- error_info_append(ei, li->origin);
- if (clash != &header_sentinel && clash != &bounds_sentinel) {
- error_info_append(ei, clash);
- if (clash_origin)
- error_info_append(ei, clash_origin);
- }
- return ei;
- }
-
- uint8_t bank = (loc->offset + loc->length - 1) / MMU_ROM_BANK_SIZE;
- if (bank != li->bank && !li->cross_blocks) {
- ErrorInfo *ei = error_info_create(line, ET_LAYOUT, ED_LYT_BLOCK_CROSS);
- if (li->origin)
- error_info_append(ei, li->origin);
- return ei;
- }
-
- for (size_t i = 0; i < loc->length; i++) {
- li->overlap_table[loc->offset + i] = line;
- li->overlap_origins[loc->offset + i] = li->origin;
- }
- return NULL;
- }
-
- /*
- Tokenize ASMLines into ASMInstructions and ASMData.
-
- NULL is returned on success and an ErrorInfo object is returned on failure.
- state->instructions, state->data, and state->symtable may or may not be
- modified regardless of success.
- */
- ErrorInfo* tokenize(AssemblerState *state)
- {
- ErrorInfo *ei = NULL;
- ASMLayoutInfo li;
- ASMSlotInfo si = {.lines = {0}};
- ASMDefineTable *deftab = asm_deftable_new();
- ASMInstruction dummy_inst = {.next = NULL}, *inst, *prev_inst = &dummy_inst;
- ASMData dummy_data = {.next = NULL}, *data, *prev_data = &dummy_data;
- const ASMLine *line = state->lines;
- size_t offset = 0;
-
- DEBUG("Running tokenizer")
- init_layout_info(&li, state);
- memset(si.slots, -1, MMU_NUM_ROM_BANKS);
-
- while (line) {
- if (line->is_label) {
- if (offset >= li.size) {
- ei = error_info_create(line, ET_LAYOUT, ED_LYT_BOUNDS);
- goto cleanup;
- }
- int8_t slot = si.slots[offset / MMU_NUM_ROM_BANKS];
- if ((ei = add_label_to_table(state->symtable, line, offset, slot)))
- goto cleanup;
- }
- else if (IS_LOCAL_DIRECTIVE(line)) {
- if (IS_DIRECTIVE(line, DIR_DEFINE)) {
- if ((ei = handle_define_directive(line, deftab)))
- goto cleanup;
- }
- else if (IS_DIRECTIVE(line, DIR_UNDEF)) {
- if ((ei = handle_undef_directive(line, deftab)))
- goto cleanup;
- }
- else if (IS_DIRECTIVE(line, DIR_ORIGIN)) {
- if ((ei = handle_origin_directive(line, &offset)))
- goto cleanup;
-
- li.origin = line;
- li.bank = offset / MMU_ROM_BANK_SIZE;
- }
- else if (IS_DIRECTIVE(line, DIR_BLOCK)) {
- if ((ei = handle_block_directive(line, &offset, &si)))
- goto cleanup;
-
- li.origin = line;
- li.bank = offset / MMU_ROM_BANK_SIZE;
- }
- else {
- if ((ei = parse_data(line, &data, offset)))
- goto cleanup;
-
- offset += data->loc.length;
- prev_data->next = data;
- prev_data = data;
-
- if ((ei = check_layout(&li, &data->loc, line)))
- goto cleanup;
- }
- }
- else {
- if ((ei = parse_instruction(line, &inst, offset, deftab)))
- goto cleanup;
-
- offset += inst->loc.length;
- prev_inst->next = inst;
- prev_inst = inst;
-
- if ((ei = check_layout(&li, &inst->loc, line)))
- goto cleanup;
- }
- line = line->next;
- }
-
- cleanup:
- state->instructions = dummy_inst.next;
- state->data = dummy_data.next;
- free_layout_info(&li);
- asm_deftable_free(deftab);
- return ei;
- }
|