From b8ad05578fb0c24484ae01b2aa606602534edb36 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Fri, 24 Apr 2015 19:15:59 -0500 Subject: [PATCH] Refactor out tokenizer; start implementing paging knowledge. --- src/assembler.c | 186 +-------------------------------- src/assembler/directives.h | 10 +- src/assembler/errors.c | 1 + src/assembler/errors.h | 1 + src/assembler/parse_util.c | 12 +++ src/assembler/parse_util.h | 1 + src/assembler/preprocessor.c | 4 + src/assembler/state.c | 1 + src/assembler/state.h | 1 + src/assembler/tokenizer.c | 241 +++++++++++++++++++++++++++++++++++++++++++ src/assembler/tokenizer.h | 11 ++ src/disassembler.c | 3 + 12 files changed, 284 insertions(+), 188 deletions(-) create mode 100644 src/assembler/tokenizer.c create mode 100644 src/assembler/tokenizer.h diff --git a/src/assembler.c b/src/assembler.c index ffc88a2..98e93d0 100644 --- a/src/assembler.c +++ b/src/assembler.c @@ -4,198 +4,15 @@ #include #include "assembler.h" -#include "assembler/directives.h" #include "assembler/errors.h" #include "assembler/io.h" -#include "assembler/parse_util.h" #include "assembler/preprocessor.h" #include "assembler/state.h" +#include "assembler/tokenizer.h" #include "logging.h" #include "rom.h" #include "util.h" -/* Sentinel values for overlap table */ -const ASMLine header_sentinel, bounds_sentinel; - -/* - Add a given line, representing a label, to the symbol table. - - Return NULL on success and an ErrorInfo object on failure (in the case of - duplicate labels). -*/ -static ErrorInfo* add_label_to_table( - ASMSymbolTable *symtable, const ASMLine *line, size_t offset) -{ - char *symbol = strndup(line->data, line->length - 1); - if (!symbol) - OUT_OF_MEMORY() - - const ASMSymbol *current = asm_symtable_find(symtable, symbol); - if (current) { - ErrorInfo *ei = error_info_create(line, ET_SYMBOL, ED_SYM_DUPE_LABELS); - error_info_append(ei, current->line); - return ei; - } - - ASMSymbol *label = malloc(sizeof(ASMSymbol)); - if (!label) - OUT_OF_MEMORY() - - // TODO: don't assume all ROM gets mapped to slot 2 - label->offset = (offset >= 0xC000) ? ((offset & 0x3FFF) + 0x8000) : offset; - label->symbol = symbol; - label->line = line; - asm_symtable_insert(symtable, label); - return NULL; -} - -/* - Parse data encoded in a line into an ASMData object. - - On success, return NULL and store the instruction in *data_ptr. On failure, - return an ErrorInfo object; *data_ptr is not modified. -*/ -static ErrorInfo* parse_data( - const ASMLine *line, ASMData **data_ptr, size_t offset) -{ - // TODO - DEBUG("parse_data(): %.*s", (int) line->length, line->data) - - return error_info_create(line, ET_PARSER, ED_PARSE_SYNTAX); -} - -/* - Parse an instruction encoded in a line into an ASMInstruction object. - - On success, return NULL and store the instruction in *inst_ptr. On failure, - return an ErrorInfo object; *inst_ptr is not modified. -*/ -static ErrorInfo* parse_instruction( - const ASMLine *line, ASMInstruction **inst_ptr, size_t offset) -{ - // TODO - DEBUG("parse_instruction(): %.*s", (int) line->length, line->data) - - return error_info_create(line, ET_PARSER, ED_PARSE_SYNTAX); -} - -/* - Check if the given location overlaps with any existing objects. - - On success, return NULL and add the location to the overlap table. - On failure, return an ErrorInfo object. -*/ -static ErrorInfo* check_layout( - const ASMLine **overlap_table, size_t size, const ASMLocation *loc, - const ASMLine *line, const ASMLine *origin) -{ - const ASMLine *clash = NULL; - - if (loc->offset + loc->length > size) { - clash = &bounds_sentinel; - } else { - for (size_t i = 0; i < loc->length; i++) { - if (overlap_table[loc->offset + i]) { - clash = overlap_table[loc->offset + i]; - break; - } - } - } - - if (clash) { - ErrorInfo *ei = error_info_create(line, ET_LAYOUT, - (clash == &header_sentinel) ? ED_LYT_OVERLAP_HEAD : - (clash == &bounds_sentinel) ? ED_LYT_BOUNDS : ED_LYT_OVERLAP); - - if (origin) - error_info_append(ei, origin); - if (clash != &header_sentinel && clash != &bounds_sentinel) - error_info_append(ei, clash); - return ei; - } - - for (size_t i = 0; i < loc->length; i++) - overlap_table[loc->offset + i] = line; - return NULL; -} - -/* - Tokenize ASMLines into ASMInstructions and ASMData. - - NULL is returned on success and an ErrorInfo object is returned on failure. - state->instructions, state->data, and state->symtable may or may not be - modified regardless of success. -*/ -static ErrorInfo* tokenize(AssemblerState *state) -{ - size_t size = state->rom_size ? state->rom_size : ROM_SIZE_MAX; - const ASMLine **overlap_table = calloc(size, sizeof(const ASMLine*)); - if (!overlap_table) - OUT_OF_MEMORY() - - ErrorInfo *ei = NULL; - ASMInstruction dummy_inst = {.next = NULL}, *inst, *prev_inst = &dummy_inst; - ASMData dummy_data = {.next = NULL}, *data, *prev_data = &dummy_data; - const ASMLine *line = state->lines, *origin = NULL; - size_t offset = 0; - - for (size_t i = 0; i < HEADER_SIZE; i++) - overlap_table[state->header.offset + i] = &header_sentinel; - - while (line) { - if (line->is_label) { - if ((ei = add_label_to_table(state->symtable, line, offset))) - goto cleanup; - } - else if (IS_LOCAL_DIRECTIVE(line)) { - if (IS_DIRECTIVE(line, DIR_ORIGIN)) { - if (!DIRECTIVE_HAS_ARG(line, DIR_ORIGIN)) { - ei = error_info_create(line, ET_PREPROC, ED_PP_NO_ARG); - goto cleanup; - } - - uint32_t arg; - if (!dparse_uint32_t(&arg, line, DIR_ORIGIN)) { - ei = error_info_create(line, ET_PREPROC, ED_PP_BAD_ARG); - goto cleanup; - } - - offset = arg; - origin = line; - } - else { - if ((ei = parse_data(line, &data, offset))) - goto cleanup; - - offset += data->loc.length; - prev_data->next = data; - prev_data = data; - - if ((ei = check_layout(overlap_table, size, &data->loc, line, origin))) - goto cleanup; - } - } - else { - if ((ei = parse_instruction(line, &inst, offset))) - goto cleanup; - - offset += inst->loc.length; - prev_inst->next = inst; - prev_inst = inst; - - if ((ei = check_layout(overlap_table, size, &inst->loc, line, origin))) - goto cleanup; - } - line = line->next; - } - - cleanup: - state->instructions = dummy_inst.next; - state->data = dummy_data.next; - free(overlap_table); - return ei; -} - /* Return the smallest ROM size that can contain the given address. @@ -203,6 +20,7 @@ static ErrorInfo* tokenize(AssemblerState *state) */ static size_t bounding_rom_size(size_t size) { + size--; size |= size >> 1; size |= size >> 2; size |= size >> 4; diff --git a/src/assembler/directives.h b/src/assembler/directives.h index 2d974d0..dc084ec 100644 --- a/src/assembler/directives.h +++ b/src/assembler/directives.h @@ -6,7 +6,7 @@ #include #define DIRECTIVE_MARKER '.' -#define NUM_DIRECTIVES 14 +#define NUM_DIRECTIVES 16 #define DIR_INCLUDE ".include" @@ -18,8 +18,10 @@ #define DIR_ROM_VERSION ".rom_version" #define DIR_ROM_REGION ".rom_region" #define DIR_ROM_DECLSIZE ".rom_declsize" +#define DIR_CROSS_BLOCKS ".cross_blocks" #define DIR_ORIGIN ".org" +#define DIR_BLOCK ".block" #define DIR_BYTE ".byte" #define DIR_ASCII ".ascii" #define DIR_ASCIZ ".asciz" @@ -33,9 +35,9 @@ (!DIRECTIVE_HAS_ARG(line, d) || (line)->data[strlen(d)] == ' ')) #define IS_LOCAL_DIRECTIVE(line) \ - (IS_DIRECTIVE(line, DIR_ORIGIN) || IS_DIRECTIVE(line, DIR_BYTE) || \ - IS_DIRECTIVE(line, DIR_ASCII) || IS_DIRECTIVE(line, DIR_ASCIZ) || \ - IS_DIRECTIVE(line, DIR_ASCIIZ)) + (IS_DIRECTIVE(line, DIR_ORIGIN) || IS_DIRECTIVE(line, DIR_BLOCK) || \ + IS_DIRECTIVE(line, DIR_BYTE) || IS_DIRECTIVE(line, DIR_ASCII) || \ + IS_DIRECTIVE(line, DIR_ASCIZ) || IS_DIRECTIVE(line, DIR_ASCIIZ)) #define DIRECTIVE_OFFSET(line, d) \ (DIRECTIVE_HAS_ARG(line, d) ? strlen(d) : 0) diff --git a/src/assembler/errors.c b/src/assembler/errors.c index 8d8e26c..57038d3 100644 --- a/src/assembler/errors.c +++ b/src/assembler/errors.c @@ -34,6 +34,7 @@ static const char *asm_error_descs[] = { "header offset exceeds given ROM size", // ED_LYT_HEADER_RANGE "declared ROM size in header exceeds actual size", // ED_LYT_DECLARE_RANGE + "block zero cannot be mapped into a nonzero slot", // ED_LYT_BLOCK0 "location is out of bounds for the ROM size", // ED_LYT_BOUNDS "location overlaps with instruction or data", // ED_LYT_OVERLAP "location overlaps with ROM header", // ED_LYT_OVERLAP_HEAD diff --git a/src/assembler/errors.h b/src/assembler/errors.h index 94b370d..ab5b907 100644 --- a/src/assembler/errors.h +++ b/src/assembler/errors.h @@ -30,6 +30,7 @@ typedef enum { ED_LYT_HEADER_RANGE, ED_LYT_DECLARE_RANGE, + ED_LYT_BLOCK0, ED_LYT_BOUNDS, ED_LYT_OVERLAP, ED_LYT_OVERLAP_HEAD, diff --git a/src/assembler/parse_util.c b/src/assembler/parse_util.c index 12a68e3..9e61411 100644 --- a/src/assembler/parse_util.c +++ b/src/assembler/parse_util.c @@ -130,6 +130,18 @@ bool parse_string(char **result, size_t *length, const char *arg, ssize_t size) } /* + Read in a space-separated sequence of bytes and store it in *result. + + *length is also updated to the number of bytes in the array. *result must + be free()'d when finished. +*/ +bool parse_bytes(uint8_t **result, size_t *length, const char *arg, ssize_t size) +{ + // TODO + return false; +} + +/* Read in a boolean argument from the given line and store it in *result. */ DIRECTIVE_PARSE_FUNC(bool, bool) diff --git a/src/assembler/parse_util.h b/src/assembler/parse_util.h index 94dbdca..d9311f2 100644 --- a/src/assembler/parse_util.h +++ b/src/assembler/parse_util.h @@ -15,6 +15,7 @@ bool parse_bool(bool*, const char*, ssize_t); bool parse_uint32_t(uint32_t*, const char*, ssize_t); bool parse_string(char**, size_t*, const char*, ssize_t); +bool parse_bytes(uint8_t**, size_t*, const char*, ssize_t); bool dparse_bool(bool*, const ASMLine*, const char*); bool dparse_uint32_t(uint32_t*, const ASMLine*, const char*); diff --git a/src/assembler/preprocessor.c b/src/assembler/preprocessor.c index 64f07a0..ab0b368 100644 --- a/src/assembler/preprocessor.c +++ b/src/assembler/preprocessor.c @@ -479,6 +479,10 @@ ErrorInfo* preprocess(AssemblerState *state, const LineBuffer *source) SAVE_LINE(rom_declsize_line) END_DIRECTIVE + BEGIN_DIRECTIVE(DIR_CROSS_BLOCKS, bool, state->cross_blocks, false) + USE_PARSER(bool) + END_DIRECTIVE + END_DIRECTIVE_BLOCK // Remove directive from lines, and schedule it for deletion: diff --git a/src/assembler/state.c b/src/assembler/state.c index 307b65b..8cc21eb 100644 --- a/src/assembler/state.c +++ b/src/assembler/state.c @@ -19,6 +19,7 @@ void state_init(AssemblerState *state) state->header.region = DEFAULT_REGION; state->header.rom_size = DEFAULT_DECLSIZE; state->optimizer = false; + state->cross_blocks = false; state->rom_size = 0; state->lines = NULL; diff --git a/src/assembler/state.h b/src/assembler/state.h index 3e71dfd..91478e0 100644 --- a/src/assembler/state.h +++ b/src/assembler/state.h @@ -78,6 +78,7 @@ typedef struct { typedef struct { ASMHeaderInfo header; bool optimizer; + bool cross_blocks; size_t rom_size; ASMLine *lines; ASMInclude *includes; diff --git a/src/assembler/tokenizer.c b/src/assembler/tokenizer.c new file mode 100644 index 0000000..3b899e6 --- /dev/null +++ b/src/assembler/tokenizer.c @@ -0,0 +1,241 @@ +/* Copyright (C) 2014-2015 Ben Kurtovic + Released under the terms of the MIT License. See LICENSE for details. */ + +#include +#include + +#include "tokenizer.h" +#include "directives.h" +#include "parse_util.h" +#include "../logging.h" +#include "../rom.h" + +/* Sentinel values for overlap table */ +const ASMLine header_sentinel, bounds_sentinel; + +/* + Add a given line, representing a label, to the symbol table. + + Return NULL on success and an ErrorInfo object on failure (in the case of + duplicate labels). +*/ +static ErrorInfo* add_label_to_table( + ASMSymbolTable *symtable, const ASMLine *line, size_t offset, ssize_t slot) +{ + char *symbol = strndup(line->data, line->length - 1); + if (!symbol) + OUT_OF_MEMORY() + + const ASMSymbol *current = asm_symtable_find(symtable, symbol); + if (current) { + ErrorInfo *ei = error_info_create(line, ET_SYMBOL, ED_SYM_DUPE_LABELS); + error_info_append(ei, current->line); + return ei; + } + + ASMSymbol *label = malloc(sizeof(ASMSymbol)); + if (!label) + OUT_OF_MEMORY() + + size_t block_offset = offset & 0x3FFF; + label->offset = slot >= 0 ? (block_offset + slot * 0x4000) : + (offset >= 0xC000 ? (block_offset + 0x8000) : offset); + label->symbol = symbol; + label->line = line; + asm_symtable_insert(symtable, label); + return NULL; +} + +/* + Handle an origin directive by updating the offset and (maybe) the slot. + + Return NULL on success and an ErrorInfo object on failure. +*/ +static ErrorInfo* handle_origin_directive( + const ASMLine *line, size_t *offset, ssize_t *slot) +{ + if (!DIRECTIVE_HAS_ARG(line, DIR_ORIGIN)) + return error_info_create(line, ET_PREPROC, ED_PP_NO_ARG); + + uint32_t arg; + if (!dparse_uint32_t(&arg, line, DIR_ORIGIN)) + return error_info_create(line, ET_PREPROC, ED_PP_BAD_ARG); + + *offset = arg; + // TODO: if different block, *slot <-- slot lookup table for this block + return NULL; +} + +/* + Handle a block directive by updating the offset and slot. + + Return NULL on success and an ErrorInfo object on failure. +*/ +static ErrorInfo* handle_block_directive( + const ASMLine *line, size_t *offset, ssize_t *slot) +{ + if (!DIRECTIVE_HAS_ARG(line, DIR_BLOCK)) + return error_info_create(line, ET_PREPROC, ED_PP_NO_ARG); + + uint8_t *args; + size_t dir_offset = DIRECTIVE_OFFSET(line, DIR_BLOCK) + 1, nargs; + + if (!parse_bytes(&args, &nargs, line->data + dir_offset, + line->length - dir_offset)) + return error_info_create(line, ET_PREPROC, ED_PP_BAD_ARG); + + if (nargs < 1 || nargs > 2 || args[0] >= 64 || (nargs == 2 && args[1] > 2)) + return error_info_create(line, ET_PREPROC, ED_PP_BAD_ARG); + + if (nargs == 2 && args[0] == 0 && args[1] != 0) + return error_info_create(line, ET_LAYOUT, ED_LYT_BLOCK0); + + *offset = args[0] * (16 << 10); + *slot = nargs == 2 ? args[1] : -1; + free(args); + return NULL; +} + +/* + Parse data encoded in a line into an ASMData object. + + On success, return NULL and store the instruction in *data_ptr. On failure, + return an ErrorInfo object; *data_ptr is not modified. +*/ +static ErrorInfo* parse_data( + const ASMLine *line, ASMData **data_ptr, size_t offset) +{ + // TODO + DEBUG("parse_data(): %.*s", (int) line->length, line->data) + + return error_info_create(line, ET_PARSER, ED_PARSE_SYNTAX); +} + +/* + Parse an instruction encoded in a line into an ASMInstruction object. + + On success, return NULL and store the instruction in *inst_ptr. On failure, + return an ErrorInfo object; *inst_ptr is not modified. +*/ +static ErrorInfo* parse_instruction( + const ASMLine *line, ASMInstruction **inst_ptr, size_t offset) +{ + // TODO + DEBUG("parse_instruction(): %.*s", (int) line->length, line->data) + + return error_info_create(line, ET_PARSER, ED_PARSE_SYNTAX); +} + +/* + Check if the given location overlaps with any existing objects. + + On success, return NULL and add the location to the overlap table. + On failure, return an ErrorInfo object. +*/ +static ErrorInfo* check_layout( + const ASMLine **overlap_table, size_t size, const ASMLocation *loc, + const ASMLine *line, const ASMLine *origin) +{ + // TODO: never let boundaries cross without state->cross_blocks + const ASMLine *clash = NULL; + + if (loc->offset + loc->length > size) { + clash = &bounds_sentinel; + } else { + for (size_t i = 0; i < loc->length; i++) { + if (overlap_table[loc->offset + i]) { + clash = overlap_table[loc->offset + i]; + break; + } + } + } + + if (clash) { + ErrorInfo *ei = error_info_create(line, ET_LAYOUT, + (clash == &header_sentinel) ? ED_LYT_OVERLAP_HEAD : + (clash == &bounds_sentinel) ? ED_LYT_BOUNDS : ED_LYT_OVERLAP); + + if (origin) + error_info_append(ei, origin); + if (clash != &header_sentinel && clash != &bounds_sentinel) + error_info_append(ei, clash); + return ei; + } + + for (size_t i = 0; i < loc->length; i++) + overlap_table[loc->offset + i] = line; + return NULL; +} + +/* + Tokenize ASMLines into ASMInstructions and ASMData. + + NULL is returned on success and an ErrorInfo object is returned on failure. + state->instructions, state->data, and state->symtable may or may not be + modified regardless of success. +*/ +ErrorInfo* tokenize(AssemblerState *state) +{ + size_t size = state->rom_size ? state->rom_size : ROM_SIZE_MAX; + const ASMLine **overlap_table = calloc(size, sizeof(const ASMLine*)); + if (!overlap_table) + OUT_OF_MEMORY() + + ErrorInfo *ei = NULL; + ASMInstruction dummy_inst = {.next = NULL}, *inst, *prev_inst = &dummy_inst; + ASMData dummy_data = {.next = NULL}, *data, *prev_data = &dummy_data; + const ASMLine *line = state->lines, *origin = NULL; + size_t offset = 0; + ssize_t slot = -1; + + for (size_t i = 0; i < HEADER_SIZE; i++) + overlap_table[state->header.offset + i] = &header_sentinel; + + while (line) { + if (line->is_label) { + if ((ei = add_label_to_table(state->symtable, line, offset, slot))) + goto cleanup; + } + else if (IS_LOCAL_DIRECTIVE(line)) { + if (IS_DIRECTIVE(line, DIR_ORIGIN)) { + if ((ei = handle_origin_directive(line, &offset, &slot))) + goto cleanup; + origin = line; + } + else if (IS_DIRECTIVE(line, DIR_BLOCK)) { + if ((ei = handle_block_directive(line, &offset, &slot))) + goto cleanup; + origin = line; + } + else { + if ((ei = parse_data(line, &data, offset))) + goto cleanup; + + offset += data->loc.length; + prev_data->next = data; + prev_data = data; + + if ((ei = check_layout(overlap_table, size, &data->loc, line, origin))) + goto cleanup; + } + } + else { + if ((ei = parse_instruction(line, &inst, offset))) + goto cleanup; + + offset += inst->loc.length; + prev_inst->next = inst; + prev_inst = inst; + + if ((ei = check_layout(overlap_table, size, &inst->loc, line, origin))) + goto cleanup; + } + line = line->next; + } + + cleanup: + state->instructions = dummy_inst.next; + state->data = dummy_data.next; + free(overlap_table); + return ei; +} diff --git a/src/assembler/tokenizer.h b/src/assembler/tokenizer.h new file mode 100644 index 0000000..3af75f2 --- /dev/null +++ b/src/assembler/tokenizer.h @@ -0,0 +1,11 @@ +/* Copyright (C) 2014-2015 Ben Kurtovic + Released under the terms of the MIT License. See LICENSE for details. */ + +#pragma once + +#include "state.h" +#include "errors.h" + +/* Functions */ + +ErrorInfo* tokenize(AssemblerState*); diff --git a/src/disassembler.c b/src/disassembler.c index c75561b..0575be1 100644 --- a/src/disassembler.c +++ b/src/disassembler.c @@ -12,5 +12,8 @@ */ bool disassemble_file(const char *src_path, const char *dst_path) { + // TODO + (void) src_path; + (void) dst_path; return true; }