From 4f791eef59cde412061d0332db5db79e1f816d2e Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Tue, 28 Apr 2015 23:33:38 -0500 Subject: [PATCH] More on instruction parsing, including mnemonic handling. --- src/assembler/errors.c | 7 ++++++- src/assembler/errors.h | 7 ++++++- src/assembler/instructions.c | 36 ++++++++++++++++++++++++++++++++++++ src/assembler/instructions.h | 19 +++++++++++++++++++ src/assembler/parse_util.c | 2 +- src/assembler/tokenizer.c | 39 ++++++++++++++++++++++++++++++--------- 6 files changed, 98 insertions(+), 12 deletions(-) create mode 100644 src/assembler/instructions.c create mode 100644 src/assembler/instructions.h diff --git a/src/assembler/errors.c b/src/assembler/errors.c index 6555d26..da75ecc 100644 --- a/src/assembler/errors.c +++ b/src/assembler/errors.c @@ -19,6 +19,8 @@ static const char *error_types[] = { }; static const char *error_descs[] = { + [ED_NONE] = "undefined error", + [ED_INC_BAD_ARG] = "missing or invalid argument", [ED_INC_DEPTH] = "maximum include depth exceeded", [ED_INC_FILE_READ] = "couldn't read included file", @@ -41,7 +43,10 @@ static const char *error_descs[] = { [ED_SYM_DUPE_LABELS] = "duplicate definitions for label", [ED_SYM_NO_LABEL] = "undefined reference to label", - [ED_PARSE_SYNTAX] = "syntax error" + [ED_PARSE_OP_LONG] = "opcode mnemonic is too long (2-4 characters)", + [ED_PARSE_OP_SHORT] = "opcode mnemonic is too short (2-4 characters)", + [ED_PARSE_OP_CHARS] = "invalid characters in opcode mnemonic", + [ED_PARSE_OP_UNKNOWN] = "unknown opcode mnemonic" }; /* Internal structs */ diff --git a/src/assembler/errors.h b/src/assembler/errors.h index 8a8b246..100d745 100644 --- a/src/assembler/errors.h +++ b/src/assembler/errors.h @@ -18,6 +18,8 @@ typedef enum { } ASMErrorType; typedef enum { + ED_NONE = 0, + ED_INC_BAD_ARG, ED_INC_DEPTH, ED_INC_FILE_READ, @@ -40,7 +42,10 @@ typedef enum { ED_SYM_DUPE_LABELS, ED_SYM_NO_LABEL, - ED_PARSE_SYNTAX + ED_PARSE_OP_LONG, + ED_PARSE_OP_SHORT, + ED_PARSE_OP_CHARS, + ED_PARSE_OP_UNKNOWN } ASMErrorDesc; /* Structs */ diff --git a/src/assembler/instructions.c b/src/assembler/instructions.c new file mode 100644 index 0000000..36d849e --- /dev/null +++ b/src/assembler/instructions.c @@ -0,0 +1,36 @@ +/* Copyright (C) 2014-2015 Ben Kurtovic + Released under the terms of the MIT License. See LICENSE for details. */ + +#include "instructions.h" +#include "../logging.h" + +/* + TEMP SYNTAX NOTES: + - http://clrhome.org/table/ + - http://www.z80.info/z80undoc.htm + - http://www.z80.info/z80code.txt + - http://www.z80.info/z80href.txt + + instruction := mnemonic [arg[, arg[, arg]]] + mnemonic := [a-z0-9]{2-4} + arg := register | immediate | label | indirect | indexed | condition | page0 + + register := A | B | C | D | E | AF | BC | DE | HL | F | I | IX | IY | PC | R | SP + immediate := 8-bit integer | 16-bit integer + label := string + indirect := \( (register | immediate) \) + indexed := \( (IX | IY) + immediate \) + condition := NZ | N | NC | C | PO | PE | P | M + page0 := $0 | $8 | $10 | $18 | $20 | $28 | $30 | $38 +*/ + +/* + ... +*/ +ASMInstParser get_inst_parser(char mnemonic[MAX_MNEMONIC_SIZE + 1]) +{ + // TODO + DEBUG("get_inst_parser(): -->%s<--", mnemonic) + + return NULL; +} diff --git a/src/assembler/instructions.h b/src/assembler/instructions.h new file mode 100644 index 0000000..9d38d17 --- /dev/null +++ b/src/assembler/instructions.h @@ -0,0 +1,19 @@ +/* Copyright (C) 2014-2015 Ben Kurtovic + Released under the terms of the MIT License. See LICENSE for details. */ + +#pragma once + +#include + +#include "errors.h" + +#define MIN_MNEMONIC_SIZE 2 +#define MAX_MNEMONIC_SIZE 4 + +/* Typedefs */ + +typedef ASMErrorDesc (*ASMInstParser)(uint8_t**, size_t*, char**, const char*, size_t); + +/* Functions */ + +ASMInstParser get_inst_parser(char[MAX_MNEMONIC_SIZE + 1]); diff --git a/src/assembler/parse_util.c b/src/assembler/parse_util.c index 33b0419..3cbc81d 100644 --- a/src/assembler/parse_util.c +++ b/src/assembler/parse_util.c @@ -72,7 +72,7 @@ bool parse_uint32_t(uint32_t *result, const char *arg, ssize_t size) while (arg < end) { if (*arg >= '0' && *arg <= '9') - value = value * 16 + (*arg - '0'); + value = (value * 0x10) + (*arg - '0'); else if (*arg >= 'a' && *arg <= 'f') value = (value * 0x10) + 0xA + (*arg - 'a'); else diff --git a/src/assembler/tokenizer.c b/src/assembler/tokenizer.c index 1da8552..9c70c41 100644 --- a/src/assembler/tokenizer.c +++ b/src/assembler/tokenizer.c @@ -6,6 +6,7 @@ #include "tokenizer.h" #include "directives.h" +#include "instructions.h" #include "parse_util.h" #include "../logging.h" #include "../mmu.h" @@ -230,23 +231,43 @@ static ErrorInfo* parse_data( static ErrorInfo* parse_instruction( const ASMLine *line, ASMInstruction **inst_ptr, size_t offset) { - // TODO - DEBUG("parse_instruction(): %.*s", (int) line->length, line->data) + char mnemonic[MAX_MNEMONIC_SIZE + 1]; + size_t i = 0; + while (i < line->length) { + char c = line->data[i]; + if (c == ' ') + break; + if (i >= MAX_MNEMONIC_SIZE) + return error_info_create(line, ET_PARSER, ED_PARSE_OP_LONG); + if ((c < 'a' || c > 'z') && (c < '0' || c > '9')) + return error_info_create(line, ET_PARSER, ED_PARSE_OP_CHARS); + mnemonic[i++] = c; + } + + if (i < MIN_MNEMONIC_SIZE) + return error_info_create(line, ET_PARSER, ED_PARSE_OP_SHORT); + + uint8_t *bytes; + size_t arglen = line->length - i, length; + char *argstart = arglen > 0 ? line->data + i : NULL, *symbol; - // SYNTAX NOTES: - // see http://clrhome.org/table/ and http://www.z80.info/z80undoc.htm + mnemonic[i] = '\0'; + ASMInstParser parser = get_inst_parser(mnemonic); + if (!parser) + return error_info_create(line, ET_PARSER, ED_PARSE_OP_UNKNOWN); - // return error_info_create(line, ET_PARSER, ED_PARSE_SYNTAX); + ASMErrorDesc edesc = parser(&bytes, &length, &symbol, argstart, arglen); + if (edesc) + return error_info_create(line, ET_PARSER, edesc); ASMInstruction *inst = malloc(sizeof(ASMInstruction)); if (!inst) OUT_OF_MEMORY() inst->loc.offset = offset; - inst->loc.length = 1; - uint8_t tmp = 0x3C; - inst->bytes = memcpy(malloc(1), &tmp, 1); - inst->symbol = NULL; + inst->loc.length = length; + inst->bytes = bytes; + inst->symbol = symbol; inst->line = line; inst->next = NULL;