From da537ff6168ab10720704bdedf05f80fd88ecd90 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Mon, 20 Apr 2015 19:34:18 -0500 Subject: [PATCH] Phase one of tokenizer and data directives. --- src/assembler.c | 97 ++++++++++++++++++++++++++++++++++++-------- src/assembler/directives.h | 16 +++++++- src/assembler/errors.c | 29 +++++++++++++ src/assembler/errors.h | 35 +++++----------- src/assembler/preprocessor.c | 9 ++-- src/assembler/state.c | 15 +++++++ src/assembler/state.h | 4 ++ src/rom.h | 3 ++ 8 files changed, 162 insertions(+), 46 deletions(-) diff --git a/src/assembler.c b/src/assembler.c index 15bcb80..03a8006 100644 --- a/src/assembler.c +++ b/src/assembler.c @@ -4,14 +4,32 @@ #include #include "assembler.h" +#include "assembler/directives.h" #include "assembler/errors.h" #include "assembler/io.h" +#include "assembler/parse_util.h" #include "assembler/preprocessor.h" #include "assembler/state.h" #include "logging.h" #include "rom.h" #include "util.h" +#define IS_LABEL(line) (line->data[line->length - 1] == ':') + +/* + Parse an instruction encoded in line into an ASMInstruction object. + + On success, return NULL and store the instruction in *inst_ptr. On failure, + return an ErrorInfo object; *inst_ptr is not modified. +*/ +static ErrorInfo* parse_instruction( + const ASMLine *line, ASMInstruction **inst_ptr, size_t offset) +{ + // TODO + + return error_info_create(line, ET_PARSER, ED_PARSE_SYNTAX); +} + /* Tokenize ASMLines into ASMInstructions. @@ -21,25 +39,68 @@ */ static ErrorInfo* tokenize(AssemblerState *state) { - // TODO + size_t size = state->rom_size ? state->rom_size : ROM_SIZE_MAX; + const ASMLine **overlap_table = calloc(size, sizeof(const ASMLine*)); + if (!overlap_table) + OUT_OF_MEMORY() - // verify no instructions clash with header offset - // if rom size is set, verify nothing overflows - // otherwise, check nothing overflows max rom size (1 MB) + // TODO: fill overlap table for header with pointers to a dummy object + + ErrorInfo *ei = NULL; + ASMInstruction dummy = {.next = NULL}, *inst, *prev = &dummy; + const ASMLine *line = state->lines, *origin = NULL; + size_t offset = 0; + + while (line) { + if (IS_LOCAL_DIRECTIVE(line)) { + if (!IS_DIRECTIVE(line, DIR_ORIGIN)) { + // TODO + ei = error_info_create(line, ET_PREPROC, ED_PP_UNKNOWN); + goto error; + } + + if (!DIRECTIVE_HAS_ARG(line, DIR_ORIGIN)) { + ei = error_info_create(line, ET_PREPROC, ED_PP_NO_ARG); + goto error; + } + + uint32_t arg; + if (!parse_uint32_t(&arg, line, DIR_ORIGIN)) { + ei = error_info_create(line, ET_PREPROC, ED_PP_BAD_ARG); + goto error; + } + + offset = arg; + origin = line; + } + else if (IS_LABEL(line)) { + // TODO: add to symbol table + } + else { + if ((ei = parse_instruction(line, &inst, offset))) + goto error; + + // TODO: bounded check on range [offset, offset + inst->length) against overlap table + // if clash, use error with current line, + // then table line (if not header), + // then origin line (if non-null) + + offset += inst->length; + prev->next = inst; + prev = inst; + } + line = line->next; + } - (void) state; + state->instructions = dummy.next; + goto cleanup; -#ifdef DEBUG_MODE - DEBUG("Dumping ASMLines:") - const ASMLine *temp = state->lines; - while (temp) { - DEBUG("- %-40.*s [%s:%02zu]", (int) temp->length, temp->data, - temp->filename, temp->original->lineno) - temp = temp->next; - } -#endif + error: + asm_instructions_free(dummy.next); - return NULL; + cleanup: + free(overlap_table); + return ei; } /* @@ -51,7 +112,7 @@ static ErrorInfo* tokenize(AssemblerState *state) static ErrorInfo* resolve_defaults(AssemblerState *state) { if (!state->rom_size) { - state->rom_size = 32 << 10; + state->rom_size = ROM_SIZE_MIN; // TODO: use highest instruction too @@ -121,6 +182,10 @@ size_t assemble(const LineBuffer *source, uint8_t **binary_ptr, ErrorInfo **ei_p asm_symtable_init(&state.symtable); +#ifdef DEBUG_MODE + asm_lines_print(state.lines); +#endif + if ((error_info = tokenize(&state))) goto error; diff --git a/src/assembler/directives.h b/src/assembler/directives.h index f43cb51..10948f3 100644 --- a/src/assembler/directives.h +++ b/src/assembler/directives.h @@ -6,10 +6,10 @@ #include #define DIRECTIVE_MARKER '.' -#define NUM_DIRECTIVES 10 +#define NUM_DIRECTIVES 15 #define DIR_INCLUDE ".include" -#define DIR_ORIGIN ".org" + #define DIR_OPTIMIZER ".optimizer" #define DIR_ROM_SIZE ".rom_size" #define DIR_ROM_HEADER ".rom_header" @@ -19,6 +19,13 @@ #define DIR_ROM_REGION ".rom_region" #define DIR_ROM_DECLSIZE ".rom_declsize" +#define DIR_ORIGIN ".org" +#define DIR_ALIGN ".align" +#define DIR_BYTE ".byte" +#define DIR_ASCII ".ascii" +#define DIR_ASCIZ ".asciz" +#define DIR_ASCIIZ ".asciiz" + #define DIRECTIVE_HAS_ARG(line, d) ((line)->length > strlen(d)) #define IS_DIRECTIVE(line, d) \ @@ -26,6 +33,11 @@ !strncmp((line)->data, d, strlen(d)) && \ (!DIRECTIVE_HAS_ARG(line, d) || (line)->data[strlen(d)] == ' ')) +#define IS_LOCAL_DIRECTIVE(line) \ + (IS_DIRECTIVE(line, DIR_ORIGIN) || IS_DIRECTIVE(line, DIR_ALIGN) || \ + IS_DIRECTIVE(line, DIR_BYTE) || IS_DIRECTIVE(line, DIR_ASCII) || \ + IS_DIRECTIVE(line, DIR_ASCIZ) || IS_DIRECTIVE(line, DIR_ASCIIZ)) + #define DIRECTIVE_OFFSET(line, d) \ (DIRECTIVE_HAS_ARG(line, d) ? strlen(d) : 0) diff --git a/src/assembler/errors.c b/src/assembler/errors.c index fe89515..172e558 100644 --- a/src/assembler/errors.c +++ b/src/assembler/errors.c @@ -11,6 +11,35 @@ #define ERROR_TYPE(err_info) (asm_error_types[err_info->type]) #define ERROR_DESC(err_info) (asm_error_descs[err_info->desc]) +/* Error strings */ + +static const char *asm_error_types[] = { + "include directive", // ET_INCLUDE + "preprocessor", // ET_PREPROC + "memory layout", // ET_LAYOUT + "instruction parser" // ET_PARSER +}; + +static const char *asm_error_descs[] = { + "missing or invalid argument", // ED_INC_BAD_ARG + "infinite recursion detected", // ED_INC_RECURSION + "couldn't read included file", // ED_INC_FILE_READ + + "unknown directive", // ED_PP_UNKNOWN + "multiple values for directive", // ED_PP_DUPLICATE + "missing argument for directive", // ED_PP_NO_ARG + "invalid argument for directive", // ED_PP_BAD_ARG + "directive argument out of range", // ED_PP_ARG_RANGE + + "header offset exceeds given ROM size", // ED_LYT_HEADER_RANGE + "declared ROM size in header exceeds actual size", // ED_LYT_DECLARE_RANGE + "location overlaps with ROM header", // ED_LYT_HEAD_OVERLAP + "location overlaps with previous instruction", // ED_LYT_INST_OVERLAP + "location overlaps with previous data", // ED_LYT_DATA_OVERLAP + + "syntax error" // ED_PARSE_SYNTAX +}; + /* Internal structs */ struct ASMErrorLine { diff --git a/src/assembler/errors.h b/src/assembler/errors.h index 1045df6..f912590 100644 --- a/src/assembler/errors.h +++ b/src/assembler/errors.h @@ -11,7 +11,9 @@ typedef enum { ET_INCLUDE, - ET_PREPROC + ET_PREPROC, + ET_LAYOUT, + ET_PARSER } ASMErrorType; typedef enum { @@ -24,30 +26,15 @@ typedef enum { ED_PP_NO_ARG, ED_PP_BAD_ARG, ED_PP_ARG_RANGE, - ED_PP_HEADER_RANGE, - ED_PP_DECLARE_RANGE -} ASMErrorDesc; -/* Strings */ - -static const char *asm_error_types[] = { - "include directive", - "preprocessor" -}; - -static const char *asm_error_descs[] = { - "missing or invalid argument", - "infinite recursion detected", - "couldn't read included file", - - "unknown directive", - "multiple values for directive", - "missing argument for directive", - "invalid argument for directive", - "directive argument out of range", - "header offset exceeds given ROM size", - "declared ROM size in header exceeds actual size" -}; + ED_LYT_HEADER_RANGE, + ED_LYT_DECLARE_RANGE, + ED_LYT_HEAD_OVERLAP, + ED_LYT_INST_OVERLAP, + ED_LYT_DATA_OVERLAP, + + ED_PARSE_SYNTAX +} ASMErrorDesc; /* Structs */ diff --git a/src/assembler/preprocessor.c b/src/assembler/preprocessor.c index ab06ee4..655ac1d 100644 --- a/src/assembler/preprocessor.c +++ b/src/assembler/preprocessor.c @@ -359,8 +359,8 @@ ErrorInfo* preprocess(AssemblerState *state, const LineBuffer *source) next = line->next; if (line->data[0] != DIRECTIVE_MARKER) continue; - if (IS_DIRECTIVE(line, DIR_ORIGIN)) - continue; // Origins are handled by tokenizer + if (IS_LOCAL_DIRECTIVE(line)) + continue; // "Local" directives are handled by the tokenizer DEBUG("- handling directive: %.*s", (int) line->length, line->data) @@ -426,13 +426,14 @@ ErrorInfo* preprocess(AssemblerState *state, const LineBuffer *source) } if (rom_size_line && state->header.offset + HEADER_SIZE > state->rom_size) { - ei = error_info_create(rom_size_line, ET_PREPROC, ED_PP_HEADER_RANGE); + // TODO: maybe should force offset to be explicit, otherwise autofix + ei = error_info_create(rom_size_line, ET_LAYOUT, ED_LYT_HEADER_RANGE); goto cleanup; } if (rom_size_line && rom_declsize_line && size_code_to_bytes(state->header.rom_size) > state->rom_size) { - ei = error_info_create(rom_size_line, ET_PREPROC, ED_PP_DECLARE_RANGE); + ei = error_info_create(rom_size_line, ET_LAYOUT, ED_LYT_DECLARE_RANGE); error_info_append(ei, rom_declsize_line); goto cleanup; } diff --git a/src/assembler/state.c b/src/assembler/state.c index a62e97f..3ddf27e 100644 --- a/src/assembler/state.c +++ b/src/assembler/state.c @@ -101,3 +101,18 @@ void asm_symtable_free(ASMSymbolTable *symtable) } free(symtable); } + +#ifdef DEBUG_MODE +/* + DEBUG FUNCTION: Print out an ASMLine list to stdout. +*/ +void asm_lines_print(const ASMLine *line) +{ + DEBUG("Dumping ASMLines:") + while (line) { + DEBUG("- %-40.*s [%s:%02zu]", (int) line->length, line->data, + line->filename, line->original->lineno) + line = line->next; + } +} +#endif diff --git a/src/assembler/state.h b/src/assembler/state.h index de0c2c5..1b4edaf 100644 --- a/src/assembler/state.h +++ b/src/assembler/state.h @@ -80,3 +80,7 @@ void asm_lines_free(ASMLine*); void asm_includes_free(ASMInclude*); void asm_instructions_free(ASMInstruction*); void asm_symtable_free(ASMSymbolTable*); + +#ifdef DEBUG_MODE +void asm_lines_print(const ASMLine*); +#endif diff --git a/src/rom.h b/src/rom.h index 8dde1e4..4e4465d 100644 --- a/src/rom.h +++ b/src/rom.h @@ -6,6 +6,9 @@ #include #include +#define ROM_SIZE_MIN (32 << 10) // 32 KB +#define ROM_SIZE_MAX ( 1 << 20) // 1 MB + #define HEADER_SIZE 16 /* Error strings */