diff --git a/src/assembler/errors.c b/src/assembler/errors.c index c102152..525798b 100644 --- a/src/assembler/errors.c +++ b/src/assembler/errors.c @@ -50,8 +50,9 @@ static const char *error_descs[] = { [ED_PS_OP_UNKNOWN] = "unknown opcode mnemonic", [ED_PS_TOO_FEW_ARGS] = "too few arguments for opcode", [ED_PS_TOO_MANY_ARGS] = "too many arguments for opcode", - // [ED_PS_ARG1_NEED_REG] = "invalid type for first argument (needs register)", - // [ED_PS_ARG1_BAD_REG] = "first argument should be a register", + [ED_PS_ARG_SYNTAX] = "invalid syntax in argument(s)", + [ED_PS_ARG0_TYPE] = "invalid type for first argument", + [ED_PS_ARG0_BAD_REG] = "unsupported register in first argument" }; /* Internal structs */ diff --git a/src/assembler/errors.h b/src/assembler/errors.h index ff7808a..12eff48 100644 --- a/src/assembler/errors.h +++ b/src/assembler/errors.h @@ -48,7 +48,10 @@ typedef enum { ED_PS_OP_INVALID, ED_PS_OP_UNKNOWN, ED_PS_TOO_FEW_ARGS, - ED_PS_TOO_MANY_ARGS + ED_PS_TOO_MANY_ARGS, + ED_PS_ARG_SYNTAX, + ED_PS_ARG0_TYPE, + ED_PS_ARG0_BAD_REG } ASMErrorDesc; /* Structs */ diff --git a/src/assembler/inst_args.h b/src/assembler/inst_args.h index c24f859..8e43585 100644 --- a/src/assembler/inst_args.h +++ b/src/assembler/inst_args.h @@ -22,20 +22,32 @@ typedef enum { REG_AF_, REG_IXH, REG_IXL, REG_IYH, REG_IYL } ASMArgRegister; +typedef enum { + IMM_U16 = 0x01, // unsigned 16-bit: [0, 65535] + IMM_U8 = 0x02, // unsigned 8-bit: [0, 255] + IMM_S8 = 0x04, // signed 8-bit: [-128, 127] + IMM_REL = 0x08, // relative offset: [-126, 129] + IMM_BIT = 0x10, // bit index: [0, 7] + IMM_RST = 0x20, // RST page 0 addr: {0x00, 0x08, 0x10, 0x18, ..., 0x38} + IMM_IM = 0x40 // interrupt mode: [0, 2] +} ASMArgImmType; + typedef struct { + ASMArgImmType mask; uint16_t value; - bool is_negative; } ASMArgImmediate; typedef struct { - bool is_reg; - ASMArgRegister reg; - ASMArgImmediate imm; + ASMArgType type; + union { + ASMArgRegister reg; + ASMArgImmediate imm; + } addr; } ASMArgIndirect; typedef struct { ASMArgRegister reg; - ASMArgImmediate imm; + int8_t offset; } ASMArgIndexed; typedef char* ASMArgLabel; @@ -44,16 +56,14 @@ typedef enum { COND_NZ, COND_N, COND_NC, COND_C, COND_PO, COND_PE, COND_P, COND_M } ASMArgCondition; -typedef union { - ASMArgRegister reg; - ASMArgImmediate imm; - ASMArgIndirect indirect; - ASMArgIndexed index; - ASMArgLabel label; - ASMArgCondition cond; -} ASMArgData; - typedef struct { ASMArgType type; - ASMArgData data; + union { + ASMArgRegister reg; + ASMArgImmediate imm; + ASMArgIndirect indirect; + ASMArgIndexed index; + ASMArgLabel label; + ASMArgCondition cond; + } data; } ASMInstArg; diff --git a/src/assembler/instructions.c b/src/assembler/instructions.c index 3c2dd3c..f2f86bb 100644 --- a/src/assembler/instructions.c +++ b/src/assembler/instructions.c @@ -1,7 +1,11 @@ /* Copyright (C) 2014-2015 Ben Kurtovic Released under the terms of the MIT License. See LICENSE for details. */ +#include +#include + #include "instructions.h" +#include "inst_args.h" #include "../logging.h" /* @@ -11,17 +15,17 @@ - http://www.z80.info/z80code.txt - http://www.z80.info/z80href.txt - instruction := mnemonic [arg[, arg[, arg]]] - mnemonic := [a-z0-9]{2-4} - arg := register | immediate | label | indirect | indexed | condition | page0 + inst := mnemonic [arg[, arg[, arg]]] + mnemonic := [a-z0-9]{2-4} + arg := register | immediate | indirect | indexed | label | condition - register := A | B | C | D | E | AF | BC | DE | HL | F | I | IX | IY | PC | R | SP - immediate := 8-bit integer | 16-bit integer - label := string + register := A | B | C | D | E | AF | BC | DE | HL | H | L | F | I | IX | + IY | PC | R | SP | AF' | IXH | IXL | IYH | IYL + immediate := 16-bit integer indirect := \( (register | immediate) \) indexed := \( (IX | IY) + immediate \) + label := string condition := NZ | N | NC | C | PO | PE | P | M - page0 := $0 | $8 | $10 | $18 | $20 | $28 | $30 | $38 */ /* Helper macros for get_inst_parser() */ @@ -37,24 +41,201 @@ #define HANDLE(m) if (key == MAKE_CMP_(#m)) return parse_inst_##m; -/* Instruction parser functions */ +/* Internal helper macros for instruction parsers */ + +#define INST_ALLOC_(len) \ + *length = len; \ + if (!(*bytes = malloc(sizeof(uint8_t) * (len)))) \ + OUT_OF_MEMORY() + +#define INST_SET_(b, val) ((*bytes)[b] = val) +#define INST_SET1_(b1) INST_SET_(0, b1) +#define INST_SET2_(b1, b2) INST_SET1_(b1), INST_SET_(1, b2) +#define INST_SET3_(b1, b2, b3) INST_SET2_(b1, b2), INST_SET_(2, b3) +#define INST_SET4_(b1, b2, b3, b4) INST_SET3_(b1, b2, b3), INST_SET_(3, b4) + +#define INST_DISPATCH_(a, b, c, d, target, ...) target + +#define INST_FILL_BYTES_(len, ...) \ + ((len > 4) ? fill_bytes_variadic(*bytes, len, __VA_ARGS__) : \ + INST_DISPATCH_(__VA_ARGS__, INST_SET4_, INST_SET3_, INST_SET2_, \ + INST_SET1_, __VA_ARGS__)(__VA_ARGS__)); + +#define INST_PREFIX_(reg) \ + (((reg) == REG_IX || (reg) == REG_IXH || (reg) == REG_IXL) ? 0xDD : 0xFD) + +/* Helper macros for instruction parsers */ #define INST_FUNC(mnemonic) \ static ASMErrorDesc parse_inst_##mnemonic( \ uint8_t **bytes, size_t *length, char **symbol, const char *arg, size_t size) +#define INST_ERROR(desc) return ED_PS_##desc; + +#define INST_TAKES_NO_ARGS \ + if (arg) \ + INST_ERROR(TOO_MANY_ARGS) \ + (void) size; + +#define INST_TAKES_ARGS(lo, hi) \ + if (!arg) \ + INST_ERROR(TOO_FEW_ARGS) \ + ASMInstArg args[3]; \ + size_t nargs; \ + ASMErrorDesc err = parse_args(args, &nargs, arg, size); \ + if (err) \ + return err; \ + if (nargs < lo) \ + INST_ERROR(TOO_FEW_ARGS) \ + if (nargs > hi) \ + INST_ERROR(TOO_MANY_ARGS) + +#define INST_ARG(n) (args[n]) + +#define INST_REG_PREFIX(n) INST_PREFIX_(INST_ARG(n).data.reg) + +#define INST_INDEX_PREFIX(n) INST_PREFIX_(INST_ARG(n).data.index.reg) + +#define INST_IND_PREFIX(n) INST_PREFIX_(INST_ARG(n).data.indirect.addr.reg) + +#define INST_RETURN(len, ...) { \ + (void) symbol; \ + INST_ALLOC_(len) \ + INST_FILL_BYTES_(len, __VA_ARGS__) \ + return ED_NONE; \ + } + +#define INST_RETURN_WITH_SYMBOL(len, label, ...) { \ + *symbol = strdup(label); \ + if (!(*symbol)) \ + OUT_OF_MEMORY() \ + INST_ALLOC_(len) \ + INST_FILL_BYTES_(len - 2, __VA_ARGS__) \ + return ED_NONE; \ + } + +/* + Fill an instruction's byte array with the given data. + + This internal function is only called for instructions longer than four + bytes (of which there is only one: the fake emulator debugging/testing + opcode with mnemonic "emu"), so it does not get used in normal situations. + + Return the value of the last byte inserted, for compatibility with the + INST_SETn_ family of macros. +*/ +static uint8_t fill_bytes_variadic(uint8_t *bytes, size_t len, ...) +{ + va_list vargs; + va_start(vargs, len); + for (size_t i = 0; i < len; i++) + bytes[i] = va_arg(vargs, unsigned); + va_end(vargs); + return bytes[len - 1]; +} + +/* + Parse a single instruction argument into an ASMInstArg object. + + Return ED_NONE (0) on success or an error code on failure. +*/ +static ASMErrorDesc parse_arg( + ASMInstArg *arg, const char *str, size_t size, char **symbol) +{ + // TODO + DEBUG("parse_arg(): -->%.*s<--", (int) size, str) + return ED_PS_ARG_SYNTAX; +} + +/* + Parse an argument string int ASMInstArg objects. + + Return ED_NONE (0) on success or an error code on failure. +*/ +static ASMErrorDesc parse_args( + ASMInstArg args[3], size_t *nargs, const char *str, size_t size) +{ + ASMErrorDesc err; + static char *symbol = NULL; + size_t start = 0, i = 0; + + while (i < size) { + char c = str[i]; + if (c == ',') { + if ((err = parse_arg(&args[*nargs], str + start, i - start, &symbol))) + return err; + (*nargs)++; + + i++; + if (i < size && str[i] == ' ') + i++; + start = i; + if (*nargs >= 3 && i < size) + return ED_PS_TOO_MANY_ARGS; + } else { + if ((c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || + c == ' ' || c == '+' || c == '-' || c == '(' || c == ')' || + c == '_' || c == '.') + i++; + else + return ED_PS_ARG_SYNTAX; + } + } + + if (i > start) { + if ((err = parse_arg(&args[*nargs], str + start, i - start, &symbol))) + return err; + } + return ED_NONE; +} + +/* Instruction parser functions */ + INST_FUNC(nop) { - DEBUG("dispatched to -> NOP") - return ED_PS_TOO_FEW_ARGS; + INST_TAKES_NO_ARGS + INST_RETURN(1, 0x00) } INST_FUNC(inc) { - DEBUG("dispatched to -> INC") - return ED_PS_TOO_FEW_ARGS; + INST_TAKES_ARGS(1, 1) + switch (INST_ARG(0).type) { + case AT_REGISTER: + switch (INST_ARG(0).data.reg) { + case REG_A: INST_RETURN(1, 0x3C) + case REG_B: INST_RETURN(1, 0x04) + case REG_C: INST_RETURN(1, 0x0C) + case REG_D: INST_RETURN(1, 0x14) + case REG_E: INST_RETURN(1, 0x1C) + case REG_H: INST_RETURN(1, 0x24) + case REG_L: INST_RETURN(1, 0x2C) + case REG_BC: INST_RETURN(1, 0x03) + case REG_DE: INST_RETURN(1, 0x13) + case REG_HL: INST_RETURN(1, 0x23) + case REG_SP: INST_RETURN(1, 0x33) + case REG_IX: INST_RETURN(2, 0xDD, 0x23) + case REG_IY: INST_RETURN(2, 0xFD, 0x23) + case REG_IXH: INST_RETURN(2, 0xDD, 0x2C) + case REG_IXL: INST_RETURN(2, 0xFD, 0x2C) + case REG_IYH: INST_RETURN(2, 0xDD, 0x2C) + case REG_IYL: INST_RETURN(2, 0xFD, 0x2C) + default: INST_ERROR(ARG0_BAD_REG) + } + case AT_INDIRECT: + if (INST_ARG(0).data.indirect.type != AT_REGISTER) + INST_ERROR(ARG0_TYPE) + if (INST_ARG(0).data.indirect.addr.reg != REG_HL) + INST_ERROR(ARG0_BAD_REG) + INST_RETURN(2, 0x34) + case AT_INDEXED: + INST_RETURN(3, INST_INDEX_PREFIX(0), 0x34, INST_ARG(0).data.index.offset) + default: + INST_ERROR(ARG0_TYPE) + } } +/* INST_FUNC(add) { DEBUG("dispatched to -> ADD") @@ -66,6 +247,7 @@ INST_FUNC(adc) DEBUG("dispatched to -> ADC") return ED_PS_TOO_FEW_ARGS; } +*/ /* Return the relevant ASMInstParser function for a given mnemonic. @@ -82,8 +264,8 @@ ASMInstParser get_inst_parser(char mstr[MAX_MNEMONIC_SIZE]) HANDLE(nop) HANDLE(inc) - HANDLE(add) - HANDLE(adc) + // HANDLE(add) + // HANDLE(adc) return NULL; } diff --git a/src/assembler/tokenizer.c b/src/assembler/tokenizer.c index 87cd8d7..5b1208e 100644 --- a/src/assembler/tokenizer.c +++ b/src/assembler/tokenizer.c @@ -253,6 +253,9 @@ static ErrorInfo* parse_instruction( if (i < MIN_MNEMONIC_SIZE) return error_info_create(line, ET_PARSER, ED_PS_OP_TOO_SHORT); + if (i + 1 < line->length) + i++; // Advance past space + uint8_t *bytes; size_t arglen = line->length - i, length; char *argstart = arglen > 0 ? line->data + i : NULL, *symbol = NULL;