diff --git a/src/VM/VM.c b/src/VM/VM.c index b505cf2..1c9105e 100644 --- a/src/VM/VM.c +++ b/src/VM/VM.c @@ -49,7 +49,7 @@ i32 VM_boot(VM* vm){ while (vm->current_pos < vm->data_size){ u8 opcode = vm->data[vm->current_pos]; - const Instruction* instr = Instruction_getFromOpcode(opcode); + const Instruction* instr = Instruction_getByOpcode(opcode); // printfe("[at 0x%x] %02X %s\n", (u32)vm->current_pos, opcode, instr->name); if(instr == NULL){ VM_setError(vm, "unknown opcode %02X", opcode); diff --git a/src/compiler/AST.c b/src/compiler/AST.c new file mode 100644 index 0000000..6a7ebf3 --- /dev/null +++ b/src/compiler/AST.c @@ -0,0 +1,5 @@ +#include "AST.h" + +List_define(Argument); +List_define(Operation); +List_define(DataDefinition); \ No newline at end of file diff --git a/src/compiler/AST.h b/src/compiler/AST.h index ad37846..74b75cb 100644 --- a/src/compiler/AST.h +++ b/src/compiler/AST.h @@ -7,27 +7,37 @@ typedef enum ArgumentType { ArgumentType_NoArgument, ArgumentType_Register, ArgumentType_ConstValue, - ArgumentType_Name, + ArgumentType_DataName, ArgumentType_NamedDataPointer, ArgumentType_NamedDataSize, -}; +} ArgumentType; typedef struct Argument { ArgumentType type; u32 value; } Argument; +List_declare(Argument); + +typedef struct Operation { + Opcode op; + List_Argument args; +} Operation; + +List_declare(Operation); + typedef struct DataDefinition { u8 element_size; - u16 size; - void* data; + u16 count; cstr name; -}; + void* data; +} DataDefinition; List_declare(DataDefinition); typedef struct AST { - DataDefinition + List_DataDefinition data; + List_Operation operations; } AST; /* diff --git a/src/compiler/compiler.c b/src/compiler/compiler.c index 79bb79a..427cc8f 100644 --- a/src/compiler/compiler.c +++ b/src/compiler/compiler.c @@ -1,78 +1,19 @@ -#include "Lexer.h" +#include "Compiler_internal.h" -List_define(Token); - -CodePos Lexer_getLineAndColumn(Lexer* cmp, u32 pos){ - u32 prev_lines_len = 0; - if(pos >= cmp->code_len) - return CodePos_create(0, 0); - - for(u32 i = 0; i < cmp->line_lengths.len; i++){ - u32 line_len = cmp->line_lengths.data[i]; - if(prev_lines_len + line_len > pos) - return CodePos_create(i + 1, pos + 1 - prev_lines_len); - prev_lines_len += line_len; - } - - return CodePos_create(0, 0); +#define setError(FORMAT, ...) {\ + completeLine(cmp);\ + Compiler_setError(cmp, FORMAT, ##__VA_ARGS__);\ } -static void completeLine(Lexer* cmp){ - List_u32_push(&cmp->line_lengths, cmp->column); - cmp->column = 0; -} - -void _Lexer_setError(Lexer* cmp, cstr context, cstr format, ...){ - completeLine(cmp); - // happens at the end of file - if(cmp->pos >= cmp->code_len) - cmp->pos = cmp->code_len - 1; - char position_str[32]; - CodePos code_pos = Lexer_getLineAndColumn(cmp, cmp->pos); - sprintf(position_str, "[at %u:%u][", code_pos.line, code_pos.column); - char* real_format = strcat_malloc(position_str, context, "] ", format); - va_list argv; - va_start(argv, format); - char* NULLABLE(buf) = vsprintf_malloc(512, real_format, argv); - va_end(argv); - free(real_format); - if(buf == NULL){ - buf = malloc(16); - strcpy(buf, "SPRINTF FAILED"); - } - cmp->state = LexerState_Error; - cmp->error_message = buf; -} - - -#define setError(FORMAT, ...) Lexer_setError(cmp, FORMAT, ##__VA_ARGS__) - -#define returnError(FORMAT, ...) {\ - setError(FORMAT, ##__VA_ARGS__);\ - return false;\ -} - -#define returnErrorIf(STATEMENT, FORMAT, ...) if(STATEMENT) returnError(FORMAT, ##__VA_ARGS__) - -#define returnErrorIf_auto(STATEMENT) returnErrorIf(STATEMENT, #STATEMENT) - #define Error_unexpectedCharacter(C) "unexpected character '%c'", C #define Error_endOfFile "unexpected end of file" -void Lexer_init(Lexer* cmp){ - memset(cmp, 0, sizeof(Lexer)); - cmp->state = LexerState_Initial; - cmp->tokens = List_Token_alloc(4096); - cmp->line_lengths = List_u32_alloc(1024); +static void completeLine(Compiler* cmp){ + List_u32_push(&cmp->line_lengths, cmp->column); + cmp->column = 0; } -void Lexer_free(Lexer* cmp){ - free(cmp->code); - free(cmp->tokens.data); - free(cmp->line_lengths.data); -} - -static void readCommentSingleLine(Lexer* cmp){ +static void readCommentSingleLine(Compiler* cmp){ char c; // '/' Token tok = Token_construct(TokenType_SingleLineComment, cmp->pos - 1, 0); cmp->column++; @@ -97,7 +38,7 @@ static void readCommentSingleLine(Lexer* cmp){ List_Token_push(&cmp->tokens, tok); } -static void readCommentMultiLine(Lexer* cmp){ +static void readCommentMultiLine(Compiler* cmp){ char c; // '*' Token tok = Token_construct(TokenType_MultiLineComment, cmp->pos - 1, 0); cmp->column++; @@ -122,7 +63,7 @@ static void readCommentMultiLine(Lexer* cmp){ setError(Error_endOfFile); } -static void readComment(Lexer* cmp){ +static void readComment(Compiler* cmp){ char c; // '/' if(cmp->pos + 1 == cmp->code_len){ setError(Error_endOfFile); @@ -144,7 +85,7 @@ static void readComment(Lexer* cmp){ else setError(Error_unexpectedCharacter(c)); } -static void readLabel(Lexer* cmp){ +static void readLabel(Compiler* cmp){ char c; // '.' cmp->pos++; cmp->column++; @@ -179,16 +120,27 @@ static void readLabel(Lexer* cmp){ else setError(Error_endOfFile); } -static void readArguments(Lexer* cmp){ +static void readArguments(Compiler* cmp){ char c; // space - cmp->pos++; - cmp->column++; - Token tok = Token_construct(TokenType_Argument, cmp->pos, 0); - + Token tok = Token_construct(TokenType_Unset, cmp->pos, 0); + char quot = '\0'; // quotation character of a string value + while(cmp->pos < cmp->code_len){ c = cmp->code[cmp->pos]; + + // string argument reading + if(quot != '\0'){ + if(c == quot && cmp->code[cmp->pos - 1] != '\\'){ + quot = '\0'; + } + else if(c == '\r' || c == '\n'){ + setError("line end reached but string hasn't been closed yet"); + return; + } + } + // end of line - if(c == '\r' || c == '\n' || c == ';'){ + else if(c == '\r' || c == '\n' || c == ';'){ tok.length = cmp->pos - tok.begin; if(tok.length > 0) List_Token_push(&cmp->tokens, tok); @@ -197,13 +149,30 @@ static void readArguments(Lexer* cmp){ } // new argument begins - if(c == ' ' || c == '\t'){ + else if(c == ' ' || c == '\t'){ tok.length = cmp->pos - tok.begin; if(tok.length > 0) List_Token_push(&cmp->tokens, tok); - tok.begin = cmp->pos + 1; + tok = Token_construct(TokenType_Unset, cmp->pos + 1, 0); + } + + else if(tok.type == TokenType_Unset){ + if(c == '\''){ + tok.type = TokenType_Char; + quot = c; + } + else if(c == '"'){ + tok.type = TokenType_String; + quot = c; + } + else if(c == '@') + tok.type = TokenType_NamedDataPointer; + else if(c == '#') + tok.type = TokenType_NamedDataSize; + else if(isDigit(c)) + tok.type = TokenType_Number; + else tok.type = TokenType_Name; } - cmp->column++; cmp->pos++; @@ -215,7 +184,7 @@ static void readArguments(Lexer* cmp){ List_Token_push(&cmp->tokens, tok); } -static void readInstruction(Lexer* cmp){ +static void readInstruction(Compiler* cmp){ Token tok = Token_construct(TokenType_Instruction, cmp->pos, 0); cmp->pos++; cmp->column++; @@ -252,86 +221,9 @@ static void readInstruction(Lexer* cmp){ List_Token_push(&cmp->tokens, tok); } -static void readChar(Lexer* cmp){ - Token tok = Token_construct(TokenType_Char, cmp->pos, 0); - cmp->pos++; - cmp->column++; - - while(cmp->pos < cmp->code_len){ - char c = cmp->code[cmp->pos]; - // end of line - if(c == '\r' || c == '\n'){ - setError(Error_unexpectedCharacter(cmp->code[--cmp->pos])); - return; - } - - if(c == '\''){ - tok.length = cmp->pos - tok.begin + 1; - List_Token_push(&cmp->tokens, tok); - return; - } - - cmp->column++; - cmp->pos++; - } - - // end of file - setError(Error_endOfFile); -} - -static void readString(Lexer* cmp){ - Token tok = Token_construct(TokenType_String, cmp->pos, 0); - cmp->pos++; - cmp->column++; - - while(cmp->pos < cmp->code_len){ - char c = cmp->code[cmp->pos]; - // end of line - if(c == '\r' || c == '\n'){ - setError(Error_unexpectedCharacter(cmp->code[--cmp->pos])); - return; - } - - if(c == '"'){ - tok.length = cmp->pos - tok.begin + 1; - List_Token_push(&cmp->tokens, tok); - return; - } - - cmp->column++; - cmp->pos++; - } - - // end of file - setError(Error_endOfFile); -} - -static void readNumber(Lexer* cmp){ - Token tok = Token_construct(TokenType_Number, cmp->pos, 0); - cmp->pos++; - cmp->column++; - - while(cmp->pos < cmp->code_len){ - char c = cmp->code[cmp->pos]; - - if(c == '\r' || c == '\n' || c == ' ' || c == '\t' || c == ',' || c == ';'){ - tok.length = cmp->pos - tok.begin; - List_Token_push(&cmp->tokens, tok); - return; - } - - cmp->column++; - cmp->pos++; - } - - // end of file - tok.length = cmp->pos - tok.begin; - List_Token_push(&cmp->tokens, tok); -} - -static bool lex(Lexer* cmp){ - returnErrorIf_auto(cmp->state != LexerState_Initial); - cmp->state = LexerState_Lexing; +bool Compiler_lex(Compiler* cmp){ + returnErrorIf_auto(cmp->state != CompilerState_Initial); + cmp->state = CompilerState_Lexing; cmp->column = 1; while(cmp->pos < cmp->code_len){ @@ -348,23 +240,15 @@ static bool lex(Lexer* cmp){ case '.': readLabel(cmp); break; - case '"': - readString(cmp); - break; - case '\'': - readChar(cmp); - break; default: // try read instruction if(isAlphabeticalLower(c) || isAlphabeticalUpper(c)) readInstruction(cmp); - else if(isDigit(c)) - readNumber(cmp); else returnError(Error_unexpectedCharacter(c)); break; } - if(cmp->state == LexerState_Error) + if(cmp->state == CompilerState_Error) return false; c = cmp->code[cmp->pos]; @@ -377,94 +261,3 @@ static bool lex(Lexer* cmp){ completeLine(cmp); return true; } - -static bool parse(Lexer* cmp){ - returnErrorIf_auto(cmp->state != LexerState_Lexing); - cmp->state = LexerState_Parsing; - - return true; -} -static bool compile(Lexer* cmp, FILE* f){ - returnErrorIf_auto(cmp->state != LexerState_Parsing); - cmp->state = LexerState_Compiling; - - return true; -} - -bool Lexer_compileTasm(Lexer* cmp, cstr source_file_name, cstr out_file_name, bool debug){ - FILE* f = fopen(source_file_name, "rb"); - if(f == NULL) - returnError("ERROR: can't open file '%s'", source_file_name); - - List_u8 buf = List_u8_alloc(64 * 1024); - int ret; - while((ret = fgetc(f)) != EOF) { - List_u8_push(&buf, ret); - } - if(ferror(f)){ - free(buf.data); - fclose(f); - returnError("can't read file '%s'", source_file_name); - } - fclose(f); - - if(buf.len == 0){ - free(buf.data); - fclose(f); - returnError("soucre file is empty"); - } - - cmp->code = (char*)buf.data; - cmp->code_len = buf.len; - List_u8_push(&buf, 0); - - f = fopen(out_file_name, "wb"); - if(f == NULL){ - free(buf.data); - returnError("ERROR: can't open file '%s'", out_file_name); - } - - if(debug){ - printf("----------------------------------[%s]---------------------------------\n", source_file_name); - fputs(cmp->code, stdout); - fputc('\n', stdout); - } - - bool success = lex(cmp); - if(debug){ - printf("------------------------------------[lines]-----------------------------------\n"); - for(u32 i = 0; i < cmp->line_lengths.len; i++){ - printf("[%u] length: %u\n", i+1, cmp->line_lengths.data[i]); - } - printf("------------------------------------[tokens]-----------------------------------\n"); - for(u32 i = 0; i < cmp->tokens.len; i++){ - Token t = cmp->tokens.data[i]; - CodePos pos = Lexer_getLineAndColumn(cmp, t.begin); - char* tokstr = malloc(4096); - strncpy(tokstr, cmp->code + t.begin, t.length); - tokstr[t.length] = 0; - printf("[l:%3u, c:%3u] %s '%s'\n", - pos.line, pos.column, - TokenType_toString(t.type), tokstr); - free(tokstr); - } - } - if(!success){ - fclose(f); - return false; - } - - success = parse(cmp); - if(!success){ - fclose(f); - return false; - } - - success = compile(cmp, f); - fclose(f); - if(success){ - cmp->state = LexerState_Success; - } - - return success; -} diff --git a/src/compiler/compiler.h b/src/compiler/compiler.h index 5390407..cb5485b 100644 --- a/src/compiler/compiler.h +++ b/src/compiler/compiler.h @@ -1,48 +1,32 @@ #pragma once #include "../std.h" #include "../collections/List.h" -#include "token.h" +#include "Token.h" #include "AST.h" -List_declare(Token); +typedef enum CompilerState { + CompilerState_Initial, + CompilerState_Lexing, + CompilerState_Parsing, + CompilerState_Compiling, + CompilerState_Error, + CompilerState_Success +} CompilerState; -typedef enum LexerState { - LexerState_Initial, - LexerState_Lexing, - LexerState_Parsing, - LexerState_Compiling, - LexerState_Error, - LexerState_Success -} LexerState; - -typedef struct Lexer { +typedef struct Compiler { char* code; u32 code_len; u32 column; // > 0 if code parsing started u32 pos; - LexerState state; + CompilerState state; NULLABLE(char* error_message); List_Token tokens; List_u32 line_lengths; -} Lexer; +} Compiler; -void Lexer_init(Lexer* cmp); -void Lexer_free(Lexer* cmp); +void Compiler_init(Compiler* cmp); +void Compiler_free(Compiler* cmp); /// @brief compile assembly language code to machine code /// @return true if no errors, false if any error occured (check cmp->error_message) -bool Lexer_compileTasm(Lexer* cmp, cstr source_file_name, cstr out_file_name, bool debug); - -#define Lexer_setError(cmp, format, ...) _Lexer_setError(cmp, __func__, format ,##__VA_ARGS__) -void _Lexer_setError(Lexer* cmp, cstr context, cstr format, ...) __attribute__((__format__(__printf__, 3, 4))); - -typedef struct CodePos { - u32 line; // 0 on error - u32 column; // 0 on error -} CodePos; - -#define CodePos_create(L, C) ((CodePos){ .line = L, .column = C }) - -/// @param pos index in code buffer -CodePos Lexer_getLineAndColumn(Lexer* cmp, u32 pos); - +bool Compiler_compile(Compiler* cmp, cstr source_file_name, cstr out_file_name, bool debug); diff --git a/src/compiler/compiler_internal.h b/src/compiler/compiler_internal.h index e69de29..4e79443 100644 --- a/src/compiler/compiler_internal.h +++ b/src/compiler/compiler_internal.h @@ -0,0 +1,27 @@ +#include "Compiler.h" + +void _Compiler_setError(Compiler* cmp, cstr context, cstr format, ...) __attribute__((__format__(__printf__, 3, 4))); + +#define Compiler_setError(cmp, format, ...) _Compiler_setError(cmp, __func__, format ,##__VA_ARGS__) + +#define returnError(FORMAT, ...) {\ + setError(FORMAT, ##__VA_ARGS__);\ + return false;\ +} + +#define returnErrorIf(STATEMENT, FORMAT, ...) if(STATEMENT) returnError(FORMAT, ##__VA_ARGS__) + +#define returnErrorIf_auto(STATEMENT) returnErrorIf(STATEMENT, #STATEMENT) + +typedef struct CodePos { + u32 line; // 0 on error + u32 column; // 0 on error +} CodePos; + +#define CodePos_create(L, C) ((CodePos){ .line = L, .column = C }) + +/// @param pos index in code buffer +CodePos Compiler_getLineAndColumn(Compiler* cmp, u32 pos); + +bool Compiler_lex(Compiler* cmp); +bool Compiler_parse(Compiler* cmp); diff --git a/src/compiler/parser.c b/src/compiler/parser.c index 5de8c9d..12b2fa6 100644 --- a/src/compiler/parser.c +++ b/src/compiler/parser.c @@ -1,3 +1,12 @@ -#include "compiler.h" +#include "Compiler_internal.h" -List_define(DataDefinition); +#define setError(FORMAT, ...) {\ + Compiler_setError(cmp, FORMAT, ##__VA_ARGS__);\ +} + +bool Compiler_parse(Compiler* cmp){ + returnErrorIf_auto(cmp->state != CompilerState_Lexing); + cmp->state = CompilerState_Parsing; + + return true; +} diff --git a/src/compiler/token.c b/src/compiler/token.c index e5f7961..cf62d87 100644 --- a/src/compiler/token.c +++ b/src/compiler/token.c @@ -1,19 +1,19 @@ -#include "token.h" +#include "Token.h" + +List_define(Token); static cstr _TokenType_str[] = { "Unset", "SingleLineComment", "MultiLineComment", + "Instruction", "Label", - "DataDefinition", "Number", "Char", "String", - "Instruction", - "Register", - "DataType", - "DataPointer", - "DataSize" + "Name", + "NamedDataPointer", + "NamedDataSize" }; cstr TokenType_toString(TokenType t){ diff --git a/src/compiler/token.h b/src/compiler/token.h index a5fee2c..2a4faad 100644 --- a/src/compiler/token.h +++ b/src/compiler/token.h @@ -1,20 +1,19 @@ #pragma once #include "../std.h" +#include "../collections/List.h" typedef enum TokenType { - TokenType_Unset, - TokenType_SingleLineComment, - TokenType_MultiLineComment, - TokenType_Label, - TOkenType_DataDefinition, - TokenType_Number, - TokenType_Char, - TokenType_String, - TokenType_Instruction, - TokenType_Register, - TokenType_DataType, - TokenType_DataPointer, - TokenType_DataSize + TokenType_Unset, // initial value + TokenType_SingleLineComment, // //comment + TokenType_MultiLineComment, // /* comment */ + TokenType_Instruction, // abc + TokenType_Label, // .abc: + TokenType_Number, // 0123 + TokenType_Char, // 'A' + TokenType_String, // "aaaa" + TokenType_Name, // xyz + TokenType_NamedDataPointer, // @xyz + TokenType_NamedDataSize // #xyz } TokenType; cstr TokenType_toString(TokenType t); @@ -25,4 +24,6 @@ typedef struct Token { TokenType type : 8; // type of token (8 bits) } Token; +List_declare(Token); + #define Token_construct(TYPE, BEGIN, END) ((Token){ .type = TYPE, .begin = BEGIN, .length = END }) diff --git a/src/instructions/instructions.c b/src/instructions/instructions.c index 1431253..f6ccedc 100644 --- a/src/instructions/instructions.c +++ b/src/instructions/instructions.c @@ -29,7 +29,7 @@ const Instruction instructions[] = { // Instruction_construct(CALL), }; -const Instruction* Instruction_getFromOpcode(Opcode opcode){ +const Instruction* Instruction_getByOpcode(Opcode opcode){ if(opcode >= ARRAY_SIZE(instructions)) return NULL; diff --git a/src/instructions/instructions.h b/src/instructions/instructions.h index 7f06caa..1c8f054 100644 --- a/src/instructions/instructions.h +++ b/src/instructions/instructions.h @@ -33,4 +33,4 @@ typedef struct Instruction { /// @brief get instruction info from table /// @param opcode any byte /// @return ptr to struct or NULL -const Instruction* NULLABLE(Instruction_getFromOpcode)(Opcode opcode); +const Instruction* NULLABLE(Instruction_getByOpcode)(Opcode opcode); diff --git a/src/main.c b/src/main.c index 65382cd..8393dfc 100644 --- a/src/main.c +++ b/src/main.c @@ -33,7 +33,7 @@ i32 main(const i32 argc, cstr* argv){ } else if(arg_is("-op") || arg_is("--opcodes")){ for(u8 opcode = 0; opcode < 255; opcode++){ - const Instruction* instr = Instruction_getFromOpcode(opcode); + const Instruction* instr = Instruction_getByOpcode(opcode); if(instr != NULL){ printf("%02X %s\n", opcode, instr->name); } @@ -134,7 +134,7 @@ i32 bootFromImage(cstr image_file){ i32 compileSources(cstr source_file, cstr out_file){ Compiler cmp; Compiler_init(&cmp); - bool success = Compiler_compileTasm(&cmp, source_file, out_file, true); + bool success = Compiler_compile(&cmp, source_file, out_file, true); Compiler_free(&cmp); if(!success){ if(cmp.error_message){