From 22b771d39ad3362566391aabf524d18c054288ca Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sun, 5 Apr 2015 02:15:18 -0500 Subject: [PATCH] Lots of work on assembler internal structure; refactor. --- src/assembler.c | 373 ++++++++++++++++++++++++++++++++++++++++++++++++++------ src/assembler.h | 12 +- src/rom.c | 16 +-- src/util.c | 49 ++++++++ src/util.h | 2 + 5 files changed, 397 insertions(+), 55 deletions(-) diff --git a/src/assembler.c b/src/assembler.c index e5d8d12..e16ab26 100644 --- a/src/assembler.c +++ b/src/assembler.c @@ -3,21 +3,90 @@ #include #include -#include #include #include #include "assembler.h" #include "logging.h" +#include "util.h" + +#define DEFAULT_HEADER_OFFSET 0x7FF0 +#define DEFAULT_REGION "GG Export" + +#define SYMBOL_TABLE_BUCKETS 128 + +/* Internal structs */ + +struct ASMLine { + char *data; + size_t length; + const Line *original; + const char *filename; + struct ASMLine *next; + struct ASMLine *include; +}; +typedef struct ASMLine ASMLine; + +struct ASMInclude { + LineBuffer *lines; + struct ASMInclude *next; +}; +typedef struct ASMInclude ASMInclude; + +struct ASMInstruction { + size_t offset; + uint8_t length; + uint8_t b1, b2, b3, b4; + uint8_t virtual_byte; + char *symbol; + struct ASMInstruction *next; +}; +typedef struct ASMInstruction ASMInstruction; + +struct ASMSymbol { + size_t offset; + char *symbol; + struct ASMSymbol *next; +}; +typedef struct ASMSymbol ASMSymbol; + +typedef struct { + ASMSymbol *buckets[SYMBOL_TABLE_BUCKETS]; +} ASMSymbolTable; + +typedef struct { + size_t offset; + bool checksum; + uint32_t product_code; + uint8_t version; + uint8_t region; + uint8_t rom_size; +} ASMHeaderInfo; + +typedef struct { + ASMHeaderInfo header; + bool optimizer; + size_t rom_size; + ASMLine *lines; + ASMInclude *includes; + ASMInstruction *instructions; + ASMSymbolTable *symtable; +} AssemblerState; /* Deallocate a LineBuffer previously created with read_source_file(). */ static void free_line_buffer(LineBuffer *buffer) { - for (size_t i = 0; i < buffer->length; i++) - free(buffer->lines[i].data); - free(buffer->lines); + Line *line = buffer->lines, *temp; + while (line) { + temp = line->next; + free(line->data); + free(line); + line = temp; + } + + free(buffer->filename); free(buffer); } @@ -30,7 +99,7 @@ static void free_line_buffer(LineBuffer *buffer) */ static LineBuffer* read_source_file(const char *path) { - FILE* fp; + FILE *fp; struct stat st; if (!(fp = fopen(path, "r"))) { @@ -50,47 +119,52 @@ static LineBuffer* read_source_file(const char *path) return NULL; } - size_t capacity = 16; LineBuffer *source = malloc(sizeof(LineBuffer)); if (!source) OUT_OF_MEMORY() - source->length = 0; - source->lines = malloc(sizeof(Line) * capacity); - if (!source->lines) + source->lines = NULL; + source->filename = malloc(sizeof(char) * (strlen(path) + 1)); + if (!source->filename) OUT_OF_MEMORY() + strcpy(source->filename, path); + + Line dummy = {.next = NULL}; + Line *line, *prev = &dummy; + size_t lineno = 1; while (1) { - char *line = NULL; - size_t lcap = 0; + char *data = NULL; + size_t cap = 0; ssize_t len; - if ((len = getline(&line, &lcap, fp)) < 0) { + if ((len = getline(&data, &cap, fp)) < 0) { if (feof(fp)) break; if (errno == ENOMEM) OUT_OF_MEMORY() ERROR_ERRNO("couldn't read source file") + fclose(fp); + source->lines = dummy.next; free_line_buffer(source); - source = NULL; - break; + return NULL; } - if (capacity <= source->length + 1) { - capacity <<= 2; - source->lines = realloc(source->lines, sizeof(Line) * capacity); - if (!source->lines) - OUT_OF_MEMORY() - } + line = malloc(sizeof(Line)); + if (!line) + OUT_OF_MEMORY() - source->lines[source->length++] = (Line) {line, len}; - if (feof(fp)) { - source->lines[source->length].length--; - break; - } + line->data = data; + line->length = feof(fp) ? len : (len - 1); + line->lineno = lineno++; + line->next = NULL; + + prev->next = line; + prev = line; } fclose(fp); + source->lines = dummy.next; return source; } @@ -102,20 +176,29 @@ static LineBuffer* read_source_file(const char *path) */ static bool write_binary_file(const char *path, const uint8_t *data, size_t size) { - // TODO - return false; + FILE *fp; + if (!(fp = fopen(path, "wb"))) { + ERROR_ERRNO("couldn't open destination file") + return false; + } + + if (!fwrite(data, size, 1, fp)) { + fclose(fp); + ERROR_ERRNO("couldn't write to destination file") + return false; + } + + fclose(fp); + return true; } /* - Print an ErrorInfo object returned by assemble() to the given file. - - The same LineBuffer passed to assemble() should be passed to this function. - Passing NULL if it is unavailable will still work, but source code snippets - where errors were noted will not be printed. + Print an ErrorInfo object returned by assemble() to the given stream. */ -void error_info_print(const ErrorInfo *error_info, FILE *file, const LineBuffer *source) +void error_info_print(const ErrorInfo *error_info, FILE *file) { // TODO + fprintf(file, "Error: Unknown error"); } /* @@ -131,6 +214,182 @@ void error_info_destroy(ErrorInfo *error_info) } /* + Initialize default values in an AssemblerState object. +*/ +static void init_state(AssemblerState *state) +{ + state->header.offset = DEFAULT_HEADER_OFFSET; + state->header.checksum = true; + state->header.product_code = 0; + state->header.version = 0; + state->header.region = region_string_to_code(DEFAULT_REGION); + state->header.rom_size = 0; + state->optimizer = false; + state->rom_size = 0; + + state->lines = NULL; + state->includes = NULL; + state->instructions = NULL; + state->symtable = NULL; +} + +/* + Deallocate an ASMLine list. +*/ +static void free_asm_lines(ASMLine *line) +{ + while (line) { + ASMLine *temp = line->next; + free(line->data); + free_asm_lines(line->include); + free(line); + line = temp; + } +} + +/* + Deallocate an ASMInclude list. +*/ +static void free_asm_includes(ASMInclude *include) +{ + while (include) { + ASMInclude *temp = include->next; + free_line_buffer(include->lines); + free(include); + include = temp; + } +} + +/* + Deallocate an ASMInstruction list. +*/ +static void free_asm_instructions(ASMInstruction *inst) +{ + while (inst) { + ASMInstruction *temp = inst->next; + if (inst->symbol) + free(inst->symbol); + free(inst); + inst = temp; + } +} + +/* + Deallocate an ASMSymbolTable. +*/ +static void free_asm_symtable(ASMSymbolTable *symtable) +{ + if (!symtable) + return; + + for (size_t bucket = 0; bucket < SYMBOL_TABLE_BUCKETS; bucket++) { + ASMSymbol *sym = symtable->buckets[bucket], *temp; + while (sym) { + temp = sym->next; + free(sym->symbol); + free(sym); + sym = temp; + } + } + free(symtable); +} + +/* + Preprocess the LineBuffer into ASMLines. Change some state along the way. + + This function processes include directives, so read_source_file() may be + called multiple times (along with the implications that has), and + state->includes may be modified. + + On success, state->lines is modified and NULL is returned. On error, an + ErrorInfo object is returned, and state->lines and state->includes are not + modified. +*/ +static ErrorInfo* preprocess(AssemblerState *state, const LineBuffer *source) +{ + // TODO + + // state->header.offset <-- check in list of acceptable values + // state->header.checksum <-- boolean check + // state->header.product_code <-- range check + // state->header.version <-- range check + // state->header.region <-- string conversion, check + // state->header.rom_size <-- value/range check + // state->optimizer <-- boolean check + // state->rom_size <-- value check + + // if giving rom size, check header offset is in rom size range + // if giving reported and actual rom size, check reported is <= actual + // ensure no duplicate explicit assignments + + return NULL; +} + +/* + Tokenize ASMLines into ASMInstructions. + + On success, state->instructions is modified and NULL is returned. On error, + an ErrorInfo object is returned and state->instructions is not modified. + state->symtable may or may not be modified regardless of success. +*/ +static ErrorInfo* tokenize(AssemblerState *state) +{ + // TODO + + // verify no instructions clash with header offset + // if rom size is set, verify nothing overflows + + return NULL; +} + +/* + Resolve default placeholder values in assembler state, such as ROM size. + + On success, no new heap objects are allocated. On error, an ErrorInfo + object is returned. +*/ +static ErrorInfo* resolve_defaults(AssemblerState *state) +{ + // TODO + + // if (!state.rom_size) + // set to max possible >= 32 KB, or error if too many instructions + // if (state.header.rom_size) + // check reported rom size is <= actual rom size + + // if (!state.header.rom_size) + // set to actual rom size + + return NULL; +} + +/* + Resolve symbol placeholders in instructions such as jumps and branches. + + On success, no new heap objects are allocated. On error, an ErrorInfo + object is returned. +*/ +static ErrorInfo* resolve_symbols(AssemblerState *state) +{ + // TODO + + return NULL; +} + +/* + Convert finalized ASMInstructions into a binary data block. + + This function should never fail. +*/ +static void serialize_binary(AssemblerState *state, uint8_t *binary) +{ + // TODO + + for (size_t i = 0; i < state->rom_size; i++) + binary[i] = 'X'; +} + +/* Assemble the z80 source code in the source code buffer into binary data. If successful, return the size of the assembled binary data and change @@ -145,8 +404,47 @@ void error_info_destroy(ErrorInfo *error_info) */ size_t assemble(const LineBuffer *source, uint8_t **binary_ptr, ErrorInfo **ei_ptr) { - // TODO - return 0; + AssemblerState state; + ErrorInfo *error_info; + size_t retval = 0; + + init_state(&state); + + if ((error_info = preprocess(&state, source))) + goto error; + + if (!(state.symtable = malloc(sizeof(ASMSymbolTable)))) + OUT_OF_MEMORY() + for (size_t bucket = 0; bucket < SYMBOL_TABLE_BUCKETS; bucket++) + state.symtable->buckets[bucket] = NULL; + + if ((error_info = tokenize(&state))) + goto error; + + if ((error_info = resolve_defaults(&state))) + goto error; + + if ((error_info = resolve_symbols(&state))) + goto error; + + uint8_t *binary = malloc(sizeof(uint8_t) * state.rom_size); + if (!binary) + OUT_OF_MEMORY() + + serialize_binary(&state, binary); + *binary_ptr = binary; + retval = state.rom_size; + goto cleanup; + + error: + *ei_ptr = error_info; + + cleanup: + free_asm_lines(state.lines); + free_asm_includes(state.includes); + free_asm_instructions(state.instructions); + free_asm_symtable(state.symtable); + return retval; } /* @@ -165,16 +463,15 @@ bool assemble_file(const char *src_path, const char *dst_path) uint8_t *binary; ErrorInfo *error_info; size_t size = assemble(source, &binary, &error_info); + free_line_buffer(source); if (!size) { - error_info_print(error_info, stderr, source); + error_info_print(error_info, stderr); error_info_destroy(error_info); - free_line_buffer(source); return false; } bool success = write_binary_file(dst_path, binary, size); free(binary); - free_line_buffer(source); return success; } diff --git a/src/assembler.h b/src/assembler.h index 4c55e00..bce1d29 100644 --- a/src/assembler.h +++ b/src/assembler.h @@ -5,17 +5,21 @@ #include #include +#include /* Structs */ -typedef struct { +struct Line { char *data; size_t length; -} Line; + size_t lineno; + struct Line *next; +}; +typedef struct Line Line; typedef struct { Line *lines; - size_t length; + char *filename; } LineBuffer; typedef struct { @@ -24,7 +28,7 @@ typedef struct { /* Functions */ -void error_info_print(const ErrorInfo*, FILE*, const LineBuffer*); +void error_info_print(const ErrorInfo*, FILE*); void error_info_destroy(ErrorInfo*); size_t assemble(const LineBuffer*, uint8_t**, ErrorInfo**); bool assemble_file(const char*, const char*); diff --git a/src/rom.c b/src/rom.c index 8616e30..34fcbfe 100644 --- a/src/rom.c +++ b/src/rom.c @@ -206,10 +206,10 @@ static bool find_and_read_header(ROM *rom) const char* rom_open(ROM **rom_ptr, const char *path) { ROM *rom; - FILE* fp; + FILE *fp; struct stat st; - if (!(fp = fopen(path, "r"))) + if (!(fp = fopen(path, "rb"))) return strerror(errno); if (fstat(fileno(fp), &st)) { @@ -285,18 +285,8 @@ void rom_close(ROM *rom) Return the region this ROM was intended for, based on header information. NULL is returned if the region code is invalid. - - Region code information is taken from: - http://www.smspower.org/Development/ROMHeader */ const char* rom_region(const ROM *rom) { - switch (rom->region_code) { - case 3: return "SMS Japan"; - case 4: return "SMS Export"; - case 5: return "GG Japan"; - case 6: return "GG Export"; - case 7: return "GG International"; - default: return NULL; - } + return region_code_to_string(rom->region_code); } diff --git a/src/util.c b/src/util.c index e6294f2..c199bb1 100644 --- a/src/util.c +++ b/src/util.c @@ -1,6 +1,9 @@ /* Copyright (C) 2014-2015 Ben Kurtovic Released under the terms of the MIT License. See LICENSE for details. */ +#include +#include + #include "util.h" #if defined __APPLE__ @@ -41,3 +44,49 @@ uint64_t get_time_ns() return spec.tv_sec * NS_PER_SEC + spec.tv_nsec; #endif } + +/* + Return the name of the region encoded by the given region code. + + The given code should not be larger than one nibble. NULL is returned if + the region code is invalid. + + Region code information is taken from: + http://www.smspower.org/Development/ROMHeader +*/ +const char* region_code_to_string(uint8_t code) +{ + switch (code) { + case 3: return "SMS Japan"; + case 4: return "SMS Export"; + case 5: return "GG Japan"; + case 6: return "GG Export"; + case 7: return "GG International"; + default: return NULL; + } +} + +/* + Return the region code that encodes the given region name. + + 0 is returned if the name is not known. This is not a valid region code. +*/ +uint8_t region_string_to_code(const char *name) +{ + if (!strcmp(name, "SMS ")) { + name += 4; + if (strcmp(name, "Japan")) + return 3; + if (strcmp(name, "Export")) + return 4; + } else if (!strcmp(name, "GG ")) { + name += 3; + if (strcmp(name, "Japan")) + return 5; + if (strcmp(name, "Export")) + return 6; + if (strcmp(name, "International")) + return 7; + } + return 0; +} diff --git a/src/util.h b/src/util.h index 367f678..539cfba 100644 --- a/src/util.h +++ b/src/util.h @@ -9,3 +9,5 @@ uint8_t bcd_decode(uint8_t); uint64_t get_time_ns(); +const char* region_code_to_string(uint8_t); +uint8_t region_string_to_code(const char*);