Parcourir la source

Refactor out tokenizer; start implementing paging knowledge.

master
Ben Kurtovic il y a 9 ans
Parent
révision
b8ad05578f
12 fichiers modifiés avec 284 ajouts et 188 suppressions
  1. +2
    -184
      src/assembler.c
  2. +6
    -4
      src/assembler/directives.h
  3. +1
    -0
      src/assembler/errors.c
  4. +1
    -0
      src/assembler/errors.h
  5. +12
    -0
      src/assembler/parse_util.c
  6. +1
    -0
      src/assembler/parse_util.h
  7. +4
    -0
      src/assembler/preprocessor.c
  8. +1
    -0
      src/assembler/state.c
  9. +1
    -0
      src/assembler/state.h
  10. +241
    -0
      src/assembler/tokenizer.c
  11. +11
    -0
      src/assembler/tokenizer.h
  12. +3
    -0
      src/disassembler.c

+ 2
- 184
src/assembler.c Voir le fichier

@@ -4,198 +4,15 @@
#include <stdlib.h>

#include "assembler.h"
#include "assembler/directives.h"
#include "assembler/errors.h"
#include "assembler/io.h"
#include "assembler/parse_util.h"
#include "assembler/preprocessor.h"
#include "assembler/state.h"
#include "assembler/tokenizer.h"
#include "logging.h"
#include "rom.h"
#include "util.h"

/* Sentinel values for overlap table */
const ASMLine header_sentinel, bounds_sentinel;

/*
Add a given line, representing a label, to the symbol table.

Return NULL on success and an ErrorInfo object on failure (in the case of
duplicate labels).
*/
static ErrorInfo* add_label_to_table(
ASMSymbolTable *symtable, const ASMLine *line, size_t offset)
{
char *symbol = strndup(line->data, line->length - 1);
if (!symbol)
OUT_OF_MEMORY()

const ASMSymbol *current = asm_symtable_find(symtable, symbol);
if (current) {
ErrorInfo *ei = error_info_create(line, ET_SYMBOL, ED_SYM_DUPE_LABELS);
error_info_append(ei, current->line);
return ei;
}

ASMSymbol *label = malloc(sizeof(ASMSymbol));
if (!label)
OUT_OF_MEMORY()

// TODO: don't assume all ROM gets mapped to slot 2
label->offset = (offset >= 0xC000) ? ((offset & 0x3FFF) + 0x8000) : offset;
label->symbol = symbol;
label->line = line;
asm_symtable_insert(symtable, label);
return NULL;
}

/*
Parse data encoded in a line into an ASMData object.

On success, return NULL and store the instruction in *data_ptr. On failure,
return an ErrorInfo object; *data_ptr is not modified.
*/
static ErrorInfo* parse_data(
const ASMLine *line, ASMData **data_ptr, size_t offset)
{
// TODO
DEBUG("parse_data(): %.*s", (int) line->length, line->data)

return error_info_create(line, ET_PARSER, ED_PARSE_SYNTAX);
}

/*
Parse an instruction encoded in a line into an ASMInstruction object.

On success, return NULL and store the instruction in *inst_ptr. On failure,
return an ErrorInfo object; *inst_ptr is not modified.
*/
static ErrorInfo* parse_instruction(
const ASMLine *line, ASMInstruction **inst_ptr, size_t offset)
{
// TODO
DEBUG("parse_instruction(): %.*s", (int) line->length, line->data)

return error_info_create(line, ET_PARSER, ED_PARSE_SYNTAX);
}

/*
Check if the given location overlaps with any existing objects.

On success, return NULL and add the location to the overlap table.
On failure, return an ErrorInfo object.
*/
static ErrorInfo* check_layout(
const ASMLine **overlap_table, size_t size, const ASMLocation *loc,
const ASMLine *line, const ASMLine *origin)
{
const ASMLine *clash = NULL;

if (loc->offset + loc->length > size) {
clash = &bounds_sentinel;
} else {
for (size_t i = 0; i < loc->length; i++) {
if (overlap_table[loc->offset + i]) {
clash = overlap_table[loc->offset + i];
break;
}
}
}

if (clash) {
ErrorInfo *ei = error_info_create(line, ET_LAYOUT,
(clash == &header_sentinel) ? ED_LYT_OVERLAP_HEAD :
(clash == &bounds_sentinel) ? ED_LYT_BOUNDS : ED_LYT_OVERLAP);

if (origin)
error_info_append(ei, origin);
if (clash != &header_sentinel && clash != &bounds_sentinel)
error_info_append(ei, clash);
return ei;
}

for (size_t i = 0; i < loc->length; i++)
overlap_table[loc->offset + i] = line;
return NULL;
}

/*
Tokenize ASMLines into ASMInstructions and ASMData.

NULL is returned on success and an ErrorInfo object is returned on failure.
state->instructions, state->data, and state->symtable may or may not be
modified regardless of success.
*/
static ErrorInfo* tokenize(AssemblerState *state)
{
size_t size = state->rom_size ? state->rom_size : ROM_SIZE_MAX;
const ASMLine **overlap_table = calloc(size, sizeof(const ASMLine*));
if (!overlap_table)
OUT_OF_MEMORY()

ErrorInfo *ei = NULL;
ASMInstruction dummy_inst = {.next = NULL}, *inst, *prev_inst = &dummy_inst;
ASMData dummy_data = {.next = NULL}, *data, *prev_data = &dummy_data;
const ASMLine *line = state->lines, *origin = NULL;
size_t offset = 0;

for (size_t i = 0; i < HEADER_SIZE; i++)
overlap_table[state->header.offset + i] = &header_sentinel;

while (line) {
if (line->is_label) {
if ((ei = add_label_to_table(state->symtable, line, offset)))
goto cleanup;
}
else if (IS_LOCAL_DIRECTIVE(line)) {
if (IS_DIRECTIVE(line, DIR_ORIGIN)) {
if (!DIRECTIVE_HAS_ARG(line, DIR_ORIGIN)) {
ei = error_info_create(line, ET_PREPROC, ED_PP_NO_ARG);
goto cleanup;
}

uint32_t arg;
if (!dparse_uint32_t(&arg, line, DIR_ORIGIN)) {
ei = error_info_create(line, ET_PREPROC, ED_PP_BAD_ARG);
goto cleanup;
}

offset = arg;
origin = line;
}
else {
if ((ei = parse_data(line, &data, offset)))
goto cleanup;

offset += data->loc.length;
prev_data->next = data;
prev_data = data;

if ((ei = check_layout(overlap_table, size, &data->loc, line, origin)))
goto cleanup;
}
}
else {
if ((ei = parse_instruction(line, &inst, offset)))
goto cleanup;

offset += inst->loc.length;
prev_inst->next = inst;
prev_inst = inst;

if ((ei = check_layout(overlap_table, size, &inst->loc, line, origin)))
goto cleanup;
}
line = line->next;
}

cleanup:
state->instructions = dummy_inst.next;
state->data = dummy_data.next;
free(overlap_table);
return ei;
}

/*
Return the smallest ROM size that can contain the given address.

@@ -203,6 +20,7 @@ static ErrorInfo* tokenize(AssemblerState *state)
*/
static size_t bounding_rom_size(size_t size)
{
size--;
size |= size >> 1;
size |= size >> 2;
size |= size >> 4;


+ 6
- 4
src/assembler/directives.h Voir le fichier

@@ -6,7 +6,7 @@
#include <string.h>

#define DIRECTIVE_MARKER '.'
#define NUM_DIRECTIVES 14
#define NUM_DIRECTIVES 16

#define DIR_INCLUDE ".include"

@@ -18,8 +18,10 @@
#define DIR_ROM_VERSION ".rom_version"
#define DIR_ROM_REGION ".rom_region"
#define DIR_ROM_DECLSIZE ".rom_declsize"
#define DIR_CROSS_BLOCKS ".cross_blocks"

#define DIR_ORIGIN ".org"
#define DIR_BLOCK ".block"
#define DIR_BYTE ".byte"
#define DIR_ASCII ".ascii"
#define DIR_ASCIZ ".asciz"
@@ -33,9 +35,9 @@
(!DIRECTIVE_HAS_ARG(line, d) || (line)->data[strlen(d)] == ' '))

#define IS_LOCAL_DIRECTIVE(line) \
(IS_DIRECTIVE(line, DIR_ORIGIN) || IS_DIRECTIVE(line, DIR_BYTE) || \
IS_DIRECTIVE(line, DIR_ASCII) || IS_DIRECTIVE(line, DIR_ASCIZ) || \
IS_DIRECTIVE(line, DIR_ASCIIZ))
(IS_DIRECTIVE(line, DIR_ORIGIN) || IS_DIRECTIVE(line, DIR_BLOCK) || \
IS_DIRECTIVE(line, DIR_BYTE) || IS_DIRECTIVE(line, DIR_ASCII) || \
IS_DIRECTIVE(line, DIR_ASCIZ) || IS_DIRECTIVE(line, DIR_ASCIIZ))

#define DIRECTIVE_OFFSET(line, d) \
(DIRECTIVE_HAS_ARG(line, d) ? strlen(d) : 0)


+ 1
- 0
src/assembler/errors.c Voir le fichier

@@ -34,6 +34,7 @@ static const char *asm_error_descs[] = {

"header offset exceeds given ROM size", // ED_LYT_HEADER_RANGE
"declared ROM size in header exceeds actual size", // ED_LYT_DECLARE_RANGE
"block zero cannot be mapped into a nonzero slot", // ED_LYT_BLOCK0
"location is out of bounds for the ROM size", // ED_LYT_BOUNDS
"location overlaps with instruction or data", // ED_LYT_OVERLAP
"location overlaps with ROM header", // ED_LYT_OVERLAP_HEAD


+ 1
- 0
src/assembler/errors.h Voir le fichier

@@ -30,6 +30,7 @@ typedef enum {

ED_LYT_HEADER_RANGE,
ED_LYT_DECLARE_RANGE,
ED_LYT_BLOCK0,
ED_LYT_BOUNDS,
ED_LYT_OVERLAP,
ED_LYT_OVERLAP_HEAD,


+ 12
- 0
src/assembler/parse_util.c Voir le fichier

@@ -130,6 +130,18 @@ bool parse_string(char **result, size_t *length, const char *arg, ssize_t size)
}

/*
Read in a space-separated sequence of bytes and store it in *result.

*length is also updated to the number of bytes in the array. *result must
be free()'d when finished.
*/
bool parse_bytes(uint8_t **result, size_t *length, const char *arg, ssize_t size)
{
// TODO
return false;
}

/*
Read in a boolean argument from the given line and store it in *result.
*/
DIRECTIVE_PARSE_FUNC(bool, bool)


+ 1
- 0
src/assembler/parse_util.h Voir le fichier

@@ -15,6 +15,7 @@
bool parse_bool(bool*, const char*, ssize_t);
bool parse_uint32_t(uint32_t*, const char*, ssize_t);
bool parse_string(char**, size_t*, const char*, ssize_t);
bool parse_bytes(uint8_t**, size_t*, const char*, ssize_t);

bool dparse_bool(bool*, const ASMLine*, const char*);
bool dparse_uint32_t(uint32_t*, const ASMLine*, const char*);


+ 4
- 0
src/assembler/preprocessor.c Voir le fichier

@@ -479,6 +479,10 @@ ErrorInfo* preprocess(AssemblerState *state, const LineBuffer *source)
SAVE_LINE(rom_declsize_line)
END_DIRECTIVE

BEGIN_DIRECTIVE(DIR_CROSS_BLOCKS, bool, state->cross_blocks, false)
USE_PARSER(bool)
END_DIRECTIVE

END_DIRECTIVE_BLOCK

// Remove directive from lines, and schedule it for deletion:


+ 1
- 0
src/assembler/state.c Voir le fichier

@@ -19,6 +19,7 @@ void state_init(AssemblerState *state)
state->header.region = DEFAULT_REGION;
state->header.rom_size = DEFAULT_DECLSIZE;
state->optimizer = false;
state->cross_blocks = false;
state->rom_size = 0;

state->lines = NULL;


+ 1
- 0
src/assembler/state.h Voir le fichier

@@ -78,6 +78,7 @@ typedef struct {
typedef struct {
ASMHeaderInfo header;
bool optimizer;
bool cross_blocks;
size_t rom_size;
ASMLine *lines;
ASMInclude *includes;


+ 241
- 0
src/assembler/tokenizer.c Voir le fichier

@@ -0,0 +1,241 @@
/* Copyright (C) 2014-2015 Ben Kurtovic <ben.kurtovic@gmail.com>
Released under the terms of the MIT License. See LICENSE for details. */

#include <stdlib.h>
#include <string.h>

#include "tokenizer.h"
#include "directives.h"
#include "parse_util.h"
#include "../logging.h"
#include "../rom.h"

/* Sentinel values for overlap table */
const ASMLine header_sentinel, bounds_sentinel;

/*
Add a given line, representing a label, to the symbol table.

Return NULL on success and an ErrorInfo object on failure (in the case of
duplicate labels).
*/
static ErrorInfo* add_label_to_table(
ASMSymbolTable *symtable, const ASMLine *line, size_t offset, ssize_t slot)
{
char *symbol = strndup(line->data, line->length - 1);
if (!symbol)
OUT_OF_MEMORY()

const ASMSymbol *current = asm_symtable_find(symtable, symbol);
if (current) {
ErrorInfo *ei = error_info_create(line, ET_SYMBOL, ED_SYM_DUPE_LABELS);
error_info_append(ei, current->line);
return ei;
}

ASMSymbol *label = malloc(sizeof(ASMSymbol));
if (!label)
OUT_OF_MEMORY()

size_t block_offset = offset & 0x3FFF;
label->offset = slot >= 0 ? (block_offset + slot * 0x4000) :
(offset >= 0xC000 ? (block_offset + 0x8000) : offset);
label->symbol = symbol;
label->line = line;
asm_symtable_insert(symtable, label);
return NULL;
}

/*
Handle an origin directive by updating the offset and (maybe) the slot.

Return NULL on success and an ErrorInfo object on failure.
*/
static ErrorInfo* handle_origin_directive(
const ASMLine *line, size_t *offset, ssize_t *slot)
{
if (!DIRECTIVE_HAS_ARG(line, DIR_ORIGIN))
return error_info_create(line, ET_PREPROC, ED_PP_NO_ARG);

uint32_t arg;
if (!dparse_uint32_t(&arg, line, DIR_ORIGIN))
return error_info_create(line, ET_PREPROC, ED_PP_BAD_ARG);

*offset = arg;
// TODO: if different block, *slot <-- slot lookup table for this block
return NULL;
}

/*
Handle a block directive by updating the offset and slot.

Return NULL on success and an ErrorInfo object on failure.
*/
static ErrorInfo* handle_block_directive(
const ASMLine *line, size_t *offset, ssize_t *slot)
{
if (!DIRECTIVE_HAS_ARG(line, DIR_BLOCK))
return error_info_create(line, ET_PREPROC, ED_PP_NO_ARG);

uint8_t *args;
size_t dir_offset = DIRECTIVE_OFFSET(line, DIR_BLOCK) + 1, nargs;

if (!parse_bytes(&args, &nargs, line->data + dir_offset,
line->length - dir_offset))
return error_info_create(line, ET_PREPROC, ED_PP_BAD_ARG);

if (nargs < 1 || nargs > 2 || args[0] >= 64 || (nargs == 2 && args[1] > 2))
return error_info_create(line, ET_PREPROC, ED_PP_BAD_ARG);

if (nargs == 2 && args[0] == 0 && args[1] != 0)
return error_info_create(line, ET_LAYOUT, ED_LYT_BLOCK0);

*offset = args[0] * (16 << 10);
*slot = nargs == 2 ? args[1] : -1;
free(args);
return NULL;
}

/*
Parse data encoded in a line into an ASMData object.

On success, return NULL and store the instruction in *data_ptr. On failure,
return an ErrorInfo object; *data_ptr is not modified.
*/
static ErrorInfo* parse_data(
const ASMLine *line, ASMData **data_ptr, size_t offset)
{
// TODO
DEBUG("parse_data(): %.*s", (int) line->length, line->data)

return error_info_create(line, ET_PARSER, ED_PARSE_SYNTAX);
}

/*
Parse an instruction encoded in a line into an ASMInstruction object.

On success, return NULL and store the instruction in *inst_ptr. On failure,
return an ErrorInfo object; *inst_ptr is not modified.
*/
static ErrorInfo* parse_instruction(
const ASMLine *line, ASMInstruction **inst_ptr, size_t offset)
{
// TODO
DEBUG("parse_instruction(): %.*s", (int) line->length, line->data)

return error_info_create(line, ET_PARSER, ED_PARSE_SYNTAX);
}

/*
Check if the given location overlaps with any existing objects.

On success, return NULL and add the location to the overlap table.
On failure, return an ErrorInfo object.
*/
static ErrorInfo* check_layout(
const ASMLine **overlap_table, size_t size, const ASMLocation *loc,
const ASMLine *line, const ASMLine *origin)
{
// TODO: never let boundaries cross without state->cross_blocks
const ASMLine *clash = NULL;

if (loc->offset + loc->length > size) {
clash = &bounds_sentinel;
} else {
for (size_t i = 0; i < loc->length; i++) {
if (overlap_table[loc->offset + i]) {
clash = overlap_table[loc->offset + i];
break;
}
}
}

if (clash) {
ErrorInfo *ei = error_info_create(line, ET_LAYOUT,
(clash == &header_sentinel) ? ED_LYT_OVERLAP_HEAD :
(clash == &bounds_sentinel) ? ED_LYT_BOUNDS : ED_LYT_OVERLAP);

if (origin)
error_info_append(ei, origin);
if (clash != &header_sentinel && clash != &bounds_sentinel)
error_info_append(ei, clash);
return ei;
}

for (size_t i = 0; i < loc->length; i++)
overlap_table[loc->offset + i] = line;
return NULL;
}

/*
Tokenize ASMLines into ASMInstructions and ASMData.

NULL is returned on success and an ErrorInfo object is returned on failure.
state->instructions, state->data, and state->symtable may or may not be
modified regardless of success.
*/
ErrorInfo* tokenize(AssemblerState *state)
{
size_t size = state->rom_size ? state->rom_size : ROM_SIZE_MAX;
const ASMLine **overlap_table = calloc(size, sizeof(const ASMLine*));
if (!overlap_table)
OUT_OF_MEMORY()

ErrorInfo *ei = NULL;
ASMInstruction dummy_inst = {.next = NULL}, *inst, *prev_inst = &dummy_inst;
ASMData dummy_data = {.next = NULL}, *data, *prev_data = &dummy_data;
const ASMLine *line = state->lines, *origin = NULL;
size_t offset = 0;
ssize_t slot = -1;

for (size_t i = 0; i < HEADER_SIZE; i++)
overlap_table[state->header.offset + i] = &header_sentinel;

while (line) {
if (line->is_label) {
if ((ei = add_label_to_table(state->symtable, line, offset, slot)))
goto cleanup;
}
else if (IS_LOCAL_DIRECTIVE(line)) {
if (IS_DIRECTIVE(line, DIR_ORIGIN)) {
if ((ei = handle_origin_directive(line, &offset, &slot)))
goto cleanup;
origin = line;
}
else if (IS_DIRECTIVE(line, DIR_BLOCK)) {
if ((ei = handle_block_directive(line, &offset, &slot)))
goto cleanup;
origin = line;
}
else {
if ((ei = parse_data(line, &data, offset)))
goto cleanup;

offset += data->loc.length;
prev_data->next = data;
prev_data = data;

if ((ei = check_layout(overlap_table, size, &data->loc, line, origin)))
goto cleanup;
}
}
else {
if ((ei = parse_instruction(line, &inst, offset)))
goto cleanup;

offset += inst->loc.length;
prev_inst->next = inst;
prev_inst = inst;

if ((ei = check_layout(overlap_table, size, &inst->loc, line, origin)))
goto cleanup;
}
line = line->next;
}

cleanup:
state->instructions = dummy_inst.next;
state->data = dummy_data.next;
free(overlap_table);
return ei;
}

+ 11
- 0
src/assembler/tokenizer.h Voir le fichier

@@ -0,0 +1,11 @@
/* Copyright (C) 2014-2015 Ben Kurtovic <ben.kurtovic@gmail.com>
Released under the terms of the MIT License. See LICENSE for details. */

#pragma once

#include "state.h"
#include "errors.h"

/* Functions */

ErrorInfo* tokenize(AssemblerState*);

+ 3
- 0
src/disassembler.c Voir le fichier

@@ -12,5 +12,8 @@
*/
bool disassemble_file(const char *src_path, const char *dst_path)
{
// TODO
(void) src_path;
(void) dst_path;
return true;
}

Chargement…
Annuler
Enregistrer