Parcourir la source

More work on instruction parser.

master
Ben Kurtovic il y a 9 ans
Parent
révision
1830304c6c
5 fichiers modifiés avec 231 ajouts et 32 suppressions
  1. +3
    -2
      src/assembler/errors.c
  2. +4
    -1
      src/assembler/errors.h
  3. +25
    -15
      src/assembler/inst_args.h
  4. +196
    -14
      src/assembler/instructions.c
  5. +3
    -0
      src/assembler/tokenizer.c

+ 3
- 2
src/assembler/errors.c Voir le fichier

@@ -50,8 +50,9 @@ static const char *error_descs[] = {
[ED_PS_OP_UNKNOWN] = "unknown opcode mnemonic",
[ED_PS_TOO_FEW_ARGS] = "too few arguments for opcode",
[ED_PS_TOO_MANY_ARGS] = "too many arguments for opcode",
// [ED_PS_ARG1_NEED_REG] = "invalid type for first argument (needs register)",
// [ED_PS_ARG1_BAD_REG] = "first argument should be a register",
[ED_PS_ARG_SYNTAX] = "invalid syntax in argument(s)",
[ED_PS_ARG0_TYPE] = "invalid type for first argument",
[ED_PS_ARG0_BAD_REG] = "unsupported register in first argument"
};

/* Internal structs */


+ 4
- 1
src/assembler/errors.h Voir le fichier

@@ -48,7 +48,10 @@ typedef enum {
ED_PS_OP_INVALID,
ED_PS_OP_UNKNOWN,
ED_PS_TOO_FEW_ARGS,
ED_PS_TOO_MANY_ARGS
ED_PS_TOO_MANY_ARGS,
ED_PS_ARG_SYNTAX,
ED_PS_ARG0_TYPE,
ED_PS_ARG0_BAD_REG
} ASMErrorDesc;

/* Structs */


+ 25
- 15
src/assembler/inst_args.h Voir le fichier

@@ -22,20 +22,32 @@ typedef enum {
REG_AF_, REG_IXH, REG_IXL, REG_IYH, REG_IYL
} ASMArgRegister;

typedef enum {
IMM_U16 = 0x01, // unsigned 16-bit: [0, 65535]
IMM_U8 = 0x02, // unsigned 8-bit: [0, 255]
IMM_S8 = 0x04, // signed 8-bit: [-128, 127]
IMM_REL = 0x08, // relative offset: [-126, 129]
IMM_BIT = 0x10, // bit index: [0, 7]
IMM_RST = 0x20, // RST page 0 addr: {0x00, 0x08, 0x10, 0x18, ..., 0x38}
IMM_IM = 0x40 // interrupt mode: [0, 2]
} ASMArgImmType;

typedef struct {
ASMArgImmType mask;
uint16_t value;
bool is_negative;
} ASMArgImmediate;

typedef struct {
bool is_reg;
ASMArgRegister reg;
ASMArgImmediate imm;
ASMArgType type;
union {
ASMArgRegister reg;
ASMArgImmediate imm;
} addr;
} ASMArgIndirect;

typedef struct {
ASMArgRegister reg;
ASMArgImmediate imm;
int8_t offset;
} ASMArgIndexed;

typedef char* ASMArgLabel;
@@ -44,16 +56,14 @@ typedef enum {
COND_NZ, COND_N, COND_NC, COND_C, COND_PO, COND_PE, COND_P, COND_M
} ASMArgCondition;

typedef union {
ASMArgRegister reg;
ASMArgImmediate imm;
ASMArgIndirect indirect;
ASMArgIndexed index;
ASMArgLabel label;
ASMArgCondition cond;
} ASMArgData;

typedef struct {
ASMArgType type;
ASMArgData data;
union {
ASMArgRegister reg;
ASMArgImmediate imm;
ASMArgIndirect indirect;
ASMArgIndexed index;
ASMArgLabel label;
ASMArgCondition cond;
} data;
} ASMInstArg;

+ 196
- 14
src/assembler/instructions.c Voir le fichier

@@ -1,7 +1,11 @@
/* Copyright (C) 2014-2015 Ben Kurtovic <ben.kurtovic@gmail.com>
Released under the terms of the MIT License. See LICENSE for details. */

#include <stdarg.h>
#include <stdlib.h>

#include "instructions.h"
#include "inst_args.h"
#include "../logging.h"

/*
@@ -11,17 +15,17 @@
- http://www.z80.info/z80code.txt
- http://www.z80.info/z80href.txt

instruction := mnemonic [arg[, arg[, arg]]]
mnemonic := [a-z0-9]{2-4}
arg := register | immediate | label | indirect | indexed | condition | page0
inst := mnemonic [arg[, arg[, arg]]]
mnemonic := [a-z0-9]{2-4}
arg := register | immediate | indirect | indexed | label | condition

register := A | B | C | D | E | AF | BC | DE | HL | F | I | IX | IY | PC | R | SP
immediate := 8-bit integer | 16-bit integer
label := string
register := A | B | C | D | E | AF | BC | DE | HL | H | L | F | I | IX |
IY | PC | R | SP | AF' | IXH | IXL | IYH | IYL
immediate := 16-bit integer
indirect := \( (register | immediate) \)
indexed := \( (IX | IY) + immediate \)
label := string
condition := NZ | N | NC | C | PO | PE | P | M
page0 := $0 | $8 | $10 | $18 | $20 | $28 | $30 | $38
*/

/* Helper macros for get_inst_parser() */
@@ -37,24 +41,201 @@

#define HANDLE(m) if (key == MAKE_CMP_(#m)) return parse_inst_##m;

/* Instruction parser functions */
/* Internal helper macros for instruction parsers */

#define INST_ALLOC_(len) \
*length = len; \
if (!(*bytes = malloc(sizeof(uint8_t) * (len)))) \
OUT_OF_MEMORY()

#define INST_SET_(b, val) ((*bytes)[b] = val)
#define INST_SET1_(b1) INST_SET_(0, b1)
#define INST_SET2_(b1, b2) INST_SET1_(b1), INST_SET_(1, b2)
#define INST_SET3_(b1, b2, b3) INST_SET2_(b1, b2), INST_SET_(2, b3)
#define INST_SET4_(b1, b2, b3, b4) INST_SET3_(b1, b2, b3), INST_SET_(3, b4)

#define INST_DISPATCH_(a, b, c, d, target, ...) target

#define INST_FILL_BYTES_(len, ...) \
((len > 4) ? fill_bytes_variadic(*bytes, len, __VA_ARGS__) : \
INST_DISPATCH_(__VA_ARGS__, INST_SET4_, INST_SET3_, INST_SET2_, \
INST_SET1_, __VA_ARGS__)(__VA_ARGS__));

#define INST_PREFIX_(reg) \
(((reg) == REG_IX || (reg) == REG_IXH || (reg) == REG_IXL) ? 0xDD : 0xFD)

/* Helper macros for instruction parsers */

#define INST_FUNC(mnemonic) \
static ASMErrorDesc parse_inst_##mnemonic( \
uint8_t **bytes, size_t *length, char **symbol, const char *arg, size_t size)

#define INST_ERROR(desc) return ED_PS_##desc;

#define INST_TAKES_NO_ARGS \
if (arg) \
INST_ERROR(TOO_MANY_ARGS) \
(void) size;

#define INST_TAKES_ARGS(lo, hi) \
if (!arg) \
INST_ERROR(TOO_FEW_ARGS) \
ASMInstArg args[3]; \
size_t nargs; \
ASMErrorDesc err = parse_args(args, &nargs, arg, size); \
if (err) \
return err; \
if (nargs < lo) \
INST_ERROR(TOO_FEW_ARGS) \
if (nargs > hi) \
INST_ERROR(TOO_MANY_ARGS)

#define INST_ARG(n) (args[n])

#define INST_REG_PREFIX(n) INST_PREFIX_(INST_ARG(n).data.reg)

#define INST_INDEX_PREFIX(n) INST_PREFIX_(INST_ARG(n).data.index.reg)

#define INST_IND_PREFIX(n) INST_PREFIX_(INST_ARG(n).data.indirect.addr.reg)

#define INST_RETURN(len, ...) { \
(void) symbol; \
INST_ALLOC_(len) \
INST_FILL_BYTES_(len, __VA_ARGS__) \
return ED_NONE; \
}

#define INST_RETURN_WITH_SYMBOL(len, label, ...) { \
*symbol = strdup(label); \
if (!(*symbol)) \
OUT_OF_MEMORY() \
INST_ALLOC_(len) \
INST_FILL_BYTES_(len - 2, __VA_ARGS__) \
return ED_NONE; \
}

/*
Fill an instruction's byte array with the given data.

This internal function is only called for instructions longer than four
bytes (of which there is only one: the fake emulator debugging/testing
opcode with mnemonic "emu"), so it does not get used in normal situations.

Return the value of the last byte inserted, for compatibility with the
INST_SETn_ family of macros.
*/
static uint8_t fill_bytes_variadic(uint8_t *bytes, size_t len, ...)
{
va_list vargs;
va_start(vargs, len);
for (size_t i = 0; i < len; i++)
bytes[i] = va_arg(vargs, unsigned);
va_end(vargs);
return bytes[len - 1];
}

/*
Parse a single instruction argument into an ASMInstArg object.

Return ED_NONE (0) on success or an error code on failure.
*/
static ASMErrorDesc parse_arg(
ASMInstArg *arg, const char *str, size_t size, char **symbol)
{
// TODO
DEBUG("parse_arg(): -->%.*s<--", (int) size, str)
return ED_PS_ARG_SYNTAX;
}

/*
Parse an argument string int ASMInstArg objects.

Return ED_NONE (0) on success or an error code on failure.
*/
static ASMErrorDesc parse_args(
ASMInstArg args[3], size_t *nargs, const char *str, size_t size)
{
ASMErrorDesc err;
static char *symbol = NULL;
size_t start = 0, i = 0;

while (i < size) {
char c = str[i];
if (c == ',') {
if ((err = parse_arg(&args[*nargs], str + start, i - start, &symbol)))
return err;
(*nargs)++;

i++;
if (i < size && str[i] == ' ')
i++;
start = i;
if (*nargs >= 3 && i < size)
return ED_PS_TOO_MANY_ARGS;
} else {
if ((c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') ||
c == ' ' || c == '+' || c == '-' || c == '(' || c == ')' ||
c == '_' || c == '.')
i++;
else
return ED_PS_ARG_SYNTAX;
}
}

if (i > start) {
if ((err = parse_arg(&args[*nargs], str + start, i - start, &symbol)))
return err;
}
return ED_NONE;
}

/* Instruction parser functions */

INST_FUNC(nop)
{
DEBUG("dispatched to -> NOP")
return ED_PS_TOO_FEW_ARGS;
INST_TAKES_NO_ARGS
INST_RETURN(1, 0x00)
}

INST_FUNC(inc)
{
DEBUG("dispatched to -> INC")
return ED_PS_TOO_FEW_ARGS;
INST_TAKES_ARGS(1, 1)
switch (INST_ARG(0).type) {
case AT_REGISTER:
switch (INST_ARG(0).data.reg) {
case REG_A: INST_RETURN(1, 0x3C)
case REG_B: INST_RETURN(1, 0x04)
case REG_C: INST_RETURN(1, 0x0C)
case REG_D: INST_RETURN(1, 0x14)
case REG_E: INST_RETURN(1, 0x1C)
case REG_H: INST_RETURN(1, 0x24)
case REG_L: INST_RETURN(1, 0x2C)
case REG_BC: INST_RETURN(1, 0x03)
case REG_DE: INST_RETURN(1, 0x13)
case REG_HL: INST_RETURN(1, 0x23)
case REG_SP: INST_RETURN(1, 0x33)
case REG_IX: INST_RETURN(2, 0xDD, 0x23)
case REG_IY: INST_RETURN(2, 0xFD, 0x23)
case REG_IXH: INST_RETURN(2, 0xDD, 0x2C)
case REG_IXL: INST_RETURN(2, 0xFD, 0x2C)
case REG_IYH: INST_RETURN(2, 0xDD, 0x2C)
case REG_IYL: INST_RETURN(2, 0xFD, 0x2C)
default: INST_ERROR(ARG0_BAD_REG)
}
case AT_INDIRECT:
if (INST_ARG(0).data.indirect.type != AT_REGISTER)
INST_ERROR(ARG0_TYPE)
if (INST_ARG(0).data.indirect.addr.reg != REG_HL)
INST_ERROR(ARG0_BAD_REG)
INST_RETURN(2, 0x34)
case AT_INDEXED:
INST_RETURN(3, INST_INDEX_PREFIX(0), 0x34, INST_ARG(0).data.index.offset)
default:
INST_ERROR(ARG0_TYPE)
}
}

/*
INST_FUNC(add)
{
DEBUG("dispatched to -> ADD")
@@ -66,6 +247,7 @@ INST_FUNC(adc)
DEBUG("dispatched to -> ADC")
return ED_PS_TOO_FEW_ARGS;
}
*/

/*
Return the relevant ASMInstParser function for a given mnemonic.
@@ -82,8 +264,8 @@ ASMInstParser get_inst_parser(char mstr[MAX_MNEMONIC_SIZE])

HANDLE(nop)
HANDLE(inc)
HANDLE(add)
HANDLE(adc)
// HANDLE(add)
// HANDLE(adc)

return NULL;
}

+ 3
- 0
src/assembler/tokenizer.c Voir le fichier

@@ -253,6 +253,9 @@ static ErrorInfo* parse_instruction(
if (i < MIN_MNEMONIC_SIZE)
return error_info_create(line, ET_PARSER, ED_PS_OP_TOO_SHORT);

if (i + 1 < line->length)
i++; // Advance past space

uint8_t *bytes;
size_t arglen = line->length - i, length;
char *argstart = arglen > 0 ? line->data + i : NULL, *symbol = NULL;


Chargement…
Annuler
Enregistrer