From 2b300a2da1fedaf02f36f149ed33f935f3d912bd Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Thu, 30 Apr 2015 00:36:56 -0500 Subject: [PATCH] Properly lower-case labels; ensure labels are not registers. --- src/assembler/errors.c | 4 ++- src/assembler/errors.h | 1 + src/assembler/inst_args.h | 59 +++++++++++++++++++++++++++++++++++++++++++ src/assembler/instructions.c | 8 +++--- src/assembler/parse_util.c | 60 +++++++++++++++++++++++++++++++++++++++++--- src/assembler/parse_util.h | 2 ++ src/assembler/preprocessor.c | 15 ++++++++++- src/assembler/tokenizer.c | 12 ++++++--- 8 files changed, 149 insertions(+), 12 deletions(-) create mode 100644 src/assembler/inst_args.h diff --git a/src/assembler/errors.c b/src/assembler/errors.c index b74d519..c102152 100644 --- a/src/assembler/errors.c +++ b/src/assembler/errors.c @@ -42,6 +42,7 @@ static const char *error_descs[] = { [ED_SYM_DUPE_LABELS] = "duplicate definitions for label", [ED_SYM_NO_LABEL] = "undefined reference to label", + [ED_SYM_IS_REGISTER] = "labels cannot share names with registers", [ED_PS_OP_TOO_LONG] = "opcode mnemonic is too long (2-4 characters)", [ED_PS_OP_TOO_SHORT] = "opcode mnemonic is too short (2-4 characters)", @@ -49,7 +50,8 @@ static const char *error_descs[] = { [ED_PS_OP_UNKNOWN] = "unknown opcode mnemonic", [ED_PS_TOO_FEW_ARGS] = "too few arguments for opcode", [ED_PS_TOO_MANY_ARGS] = "too many arguments for opcode", - // [ED_PS_ARG1_TYPE] = "invalid type for first argument", + // [ED_PS_ARG1_NEED_REG] = "invalid type for first argument (needs register)", + // [ED_PS_ARG1_BAD_REG] = "first argument should be a register", }; /* Internal structs */ diff --git a/src/assembler/errors.h b/src/assembler/errors.h index 4ed6da5..ff7808a 100644 --- a/src/assembler/errors.h +++ b/src/assembler/errors.h @@ -41,6 +41,7 @@ typedef enum { ED_SYM_DUPE_LABELS, ED_SYM_NO_LABEL, + ED_SYM_IS_REGISTER, ED_PS_OP_TOO_LONG, ED_PS_OP_TOO_SHORT, diff --git a/src/assembler/inst_args.h b/src/assembler/inst_args.h new file mode 100644 index 0000000..c24f859 --- /dev/null +++ b/src/assembler/inst_args.h @@ -0,0 +1,59 @@ +/* Copyright (C) 2014-2015 Ben Kurtovic + Released under the terms of the MIT License. See LICENSE for details. */ + +#pragma once + +#include +#include + +typedef enum { + AT_REGISTER, + AT_IMMEDIATE, + AT_INDIRECT, + AT_INDEXED, + AT_LABEL, + AT_CONDITION +} ASMArgType; + +typedef enum { + REG_A, REG_F, REG_B, REG_C, REG_D, REG_E, REG_H, REG_L, REG_I, REG_R, + REG_AF, REG_BC, REG_DE, REG_HL, REG_IX, REG_IY, + REG_PC, REG_SP, + REG_AF_, REG_IXH, REG_IXL, REG_IYH, REG_IYL +} ASMArgRegister; + +typedef struct { + uint16_t value; + bool is_negative; +} ASMArgImmediate; + +typedef struct { + bool is_reg; + ASMArgRegister reg; + ASMArgImmediate imm; +} ASMArgIndirect; + +typedef struct { + ASMArgRegister reg; + ASMArgImmediate imm; +} ASMArgIndexed; + +typedef char* ASMArgLabel; + +typedef enum { + COND_NZ, COND_N, COND_NC, COND_C, COND_PO, COND_PE, COND_P, COND_M +} ASMArgCondition; + +typedef union { + ASMArgRegister reg; + ASMArgImmediate imm; + ASMArgIndirect indirect; + ASMArgIndexed index; + ASMArgLabel label; + ASMArgCondition cond; +} ASMArgData; + +typedef struct { + ASMArgType type; + ASMArgData data; +} ASMInstArg; diff --git a/src/assembler/instructions.c b/src/assembler/instructions.c index c7f2179..3c2dd3c 100644 --- a/src/assembler/instructions.c +++ b/src/assembler/instructions.c @@ -29,11 +29,11 @@ #define JOIN_(a, b, c, d) ((uint32_t) ((a << 24) + (b << 16) + (c << 8) + d)) #define DISPATCH_(s, z) ( \ - z == 2 ? JOIN_(s[0], s[1], 0x00, 0x00) : \ - z == 3 ? JOIN_(s[0], s[1], s[2], 0x00) : \ - JOIN_(s[0], s[1], s[2], s[3])) \ + (z) == 2 ? JOIN_(s[0], s[1], 0x00, 0x00) : \ + (z) == 3 ? JOIN_(s[0], s[1], s[2], 0x00) : \ + JOIN_(s[0], s[1], s[2], s[3])) \ -#define MAKE_CMP_(s) DISPATCH_(s, (sizeof(s) / sizeof(char) - 1)) +#define MAKE_CMP_(s) DISPATCH_(s, sizeof(s) / sizeof(char) - 1) #define HANDLE(m) if (key == MAKE_CMP_(#m)) return parse_inst_##m; diff --git a/src/assembler/parse_util.c b/src/assembler/parse_util.c index 3cbc81d..f5d15af 100644 --- a/src/assembler/parse_util.c +++ b/src/assembler/parse_util.c @@ -27,9 +27,6 @@ */ bool parse_bool(bool *result, const char *arg, ssize_t size) { - if (size <= 0 || size > 5) - return false; - switch (size) { case 1: // 0, 1 if (*arg == '0' || *arg == '1') @@ -176,6 +173,63 @@ bool parse_bytes(uint8_t **result, size_t *length, const char *arg, ssize_t size } /* + Read in a register argument and store it in *result. +*/ +bool parse_register(ASMArgRegister *result, const char *arg, ssize_t size) +{ + if (size < 1 || size > 3) + return false; + +#define LCASE(c) ((c >= 'A' && c <= 'Z') ? (c + 'a' - 'A') : c) + char buf[3] = {'\0'}; + switch (size) { + case 3: buf[2] = LCASE(arg[2]); + case 2: buf[1] = LCASE(arg[1]); + case 1: buf[0] = LCASE(arg[0]); + } +#undef LCASE + + switch (size) { + case 1: + switch (buf[0]) { + case 'a': return (*result = REG_A), true; + case 'f': return (*result = REG_F), true; + case 'b': return (*result = REG_B), true; + case 'c': return (*result = REG_C), true; + case 'd': return (*result = REG_D), true; + case 'e': return (*result = REG_E), true; + case 'h': return (*result = REG_H), true; + case 'l': return (*result = REG_L), true; + case 'i': return (*result = REG_I), true; + case 'r': return (*result = REG_R), true; + } + return false; + case 2: + switch ((buf[0] << 8) + buf[1]) { + case 0x6166: return (*result = REG_AF), true; + case 0x6263: return (*result = REG_BC), true; + case 0x6465: return (*result = REG_DE), true; + case 0x686c: return (*result = REG_HL), true; + case 0x6978: return (*result = REG_IX), true; + case 0x6979: return (*result = REG_IY), true; + case 0x7063: return (*result = REG_PC), true; + case 0x7370: return (*result = REG_SP), true; + } + return false; + case 3: + switch ((buf[0] << 16) + (buf[1] << 8) + buf[2]) { + case 0x616627: return (*result = REG_AF_), true; + case 0x697868: return (*result = REG_IXH), true; + case 0x69786c: return (*result = REG_IXL), true; + case 0x697968: return (*result = REG_IYH), true; + case 0x69796c: return (*result = REG_IYL), true; + } + return false; + } + return false; +} + +/* Read in a boolean argument from the given line and store it in *result. */ DIRECTIVE_PARSE_FUNC(bool, bool) diff --git a/src/assembler/parse_util.h b/src/assembler/parse_util.h index d9311f2..011f517 100644 --- a/src/assembler/parse_util.h +++ b/src/assembler/parse_util.h @@ -6,6 +6,7 @@ #include #include +#include "inst_args.h" #include "state.h" #define dparse__Bool dparse_bool @@ -16,6 +17,7 @@ bool parse_bool(bool*, const char*, ssize_t); bool parse_uint32_t(uint32_t*, const char*, ssize_t); bool parse_string(char**, size_t*, const char*, ssize_t); bool parse_bytes(uint8_t**, size_t*, const char*, ssize_t); +bool parse_register(ASMArgRegister*, const char*, ssize_t); bool dparse_bool(bool*, const ASMLine*, const char*); bool dparse_uint32_t(uint32_t*, const ASMLine*, const char*); diff --git a/src/assembler/preprocessor.c b/src/assembler/preprocessor.c index 0fc5766..7a482b7 100644 --- a/src/assembler/preprocessor.c +++ b/src/assembler/preprocessor.c @@ -94,6 +94,19 @@ static inline bool is_valid_label_char(char c, bool first) } /* + Functions similar memcpy, but lowercases the characters along the way. +*/ +static void memcpy_lc(char *restrict dst, const char *restrict src, size_t n) +{ + while (n-- > 0) { + char c = *(src++); + if (c >= 'A' && c <= 'Z') + c += 'a' - 'A'; + *(dst++) = c; + } +} + +/* Preprocess a single source line for labels. Return the index of first non-whitespace non-label character. *head_ptr is @@ -125,7 +138,7 @@ static size_t read_labels( if (!line->data) OUT_OF_MEMORY() - strncpy(line->data, source + start, i - start + 1); + memcpy_lc(line->data, source + start, i - start + 1); line->length = i - start + 1; line->is_label = true; diff --git a/src/assembler/tokenizer.c b/src/assembler/tokenizer.c index a21188b..87cd8d7 100644 --- a/src/assembler/tokenizer.c +++ b/src/assembler/tokenizer.c @@ -7,6 +7,7 @@ #include "tokenizer.h" #include "directives.h" #include "instructions.h" +#include "inst_args.h" #include "parse_util.h" #include "../logging.h" #include "../mmu.h" @@ -55,12 +56,16 @@ static inline int8_t default_bank_slot(uint8_t bank) /* Add a given line, representing a label, to the symbol table. - Return NULL on success and an ErrorInfo object on failure (in the case of - duplicate labels). + Return NULL on success and an ErrorInfo object on failure (e.g. in the case + of duplicate labels). */ static ErrorInfo* add_label_to_table( ASMSymbolTable *symtable, const ASMLine *line, size_t offset, int8_t slot) { + ASMArgRegister reg; + if (parse_register(®, line->data, line->length - 1)) + return error_info_create(line, ET_SYMBOL, ED_SYM_IS_REGISTER); + char *symbol = strndup(line->data, line->length - 1); if (!symbol) OUT_OF_MEMORY() @@ -69,6 +74,7 @@ static ErrorInfo* add_label_to_table( if (current) { ErrorInfo *ei = error_info_create(line, ET_SYMBOL, ED_SYM_DUPE_LABELS); error_info_append(ei, current->line); + free(symbol); return ei; } @@ -249,7 +255,7 @@ static ErrorInfo* parse_instruction( uint8_t *bytes; size_t arglen = line->length - i, length; - char *argstart = arglen > 0 ? line->data + i : NULL, *symbol; + char *argstart = arglen > 0 ? line->data + i : NULL, *symbol = NULL; ASMInstParser parser = get_inst_parser(mnemonic); if (!parser)