diff --git a/src/compiler/Compiler.h b/src/compiler/Compiler.h new file mode 100644 index 0000000..6124102 --- /dev/null +++ b/src/compiler/Compiler.h @@ -0,0 +1,34 @@ +#pragma once +#include "../std.h" +#include "../collections/List.h" +#include "Token.h" +#include "AST.h" + +typedef enum CompilerState { + CompilerState_Initial, + CompilerState_Lexing, + CompilerState_Parsing, + CompilerState_Compiling, + CompilerState_Error, + CompilerState_Success +} CompilerState; + +typedef struct Compiler { + char* code; + u32 code_len; + u32 column; // > 0 if code parsing started + u32 pos; + CompilerState state; + NULLABLE(char* error_message); + List_Token tokens; + List_u32 line_lengths; + AST ast; + u32 tok_i; +} Compiler; + +void Compiler_init(Compiler* cmp); +void Compiler_free(Compiler* cmp); + +/// @brief compile assembly language code to machine code +/// @return true if no errors, false if any error occured (check cmp->error_message) +bool Compiler_compile(Compiler* cmp, cstr source_file_name, cstr out_file_name, bool debug); diff --git a/src/compiler/Compiler_internal.h b/src/compiler/Compiler_internal.h new file mode 100644 index 0000000..f3ce11c --- /dev/null +++ b/src/compiler/Compiler_internal.h @@ -0,0 +1,29 @@ +#include "Compiler.h" + +void _Compiler_setError(Compiler* cmp, cstr context, cstr format, ...) __attribute__((__format__(__printf__, 3, 4))); + +#define Compiler_setError(cmp, format, ...) _Compiler_setError(cmp, __func__, format ,##__VA_ARGS__) + +#define returnError(FORMAT, ...) {\ + setError(FORMAT, ##__VA_ARGS__);\ + return false;\ +} + +#define returnErrorIf(STATEMENT, FORMAT, ...) if(STATEMENT) returnError(FORMAT, ##__VA_ARGS__) + +#define returnErrorIf_auto(STATEMENT) returnErrorIf(STATEMENT, #STATEMENT) + +typedef struct CodePos { + u32 line; // 0 on error + u32 column; // 0 on error +} CodePos; + +#define CodePos_create(L, C) ((CodePos){ .line = L, .column = C }) + +/// @param pos index in code buffer +CodePos Compiler_getLineAndColumn(Compiler* cmp, u32 pos); + +char* Compiler_extractTokenStr(Compiler* cmp, Token t); + +bool Compiler_lex(Compiler* cmp); +bool Compiler_parse(Compiler* cmp); diff --git a/src/compiler/Parser.c b/src/compiler/Parser.c new file mode 100644 index 0000000..ebf366b --- /dev/null +++ b/src/compiler/Parser.c @@ -0,0 +1,68 @@ +#include "Compiler_internal.h" + +#define setError(FORMAT, ...) {\ + cmp->pos = cmp->tokens.data[cmp->tok_i].begin;\ + Compiler_setError(cmp, FORMAT, ##__VA_ARGS__);\ +} + +#define setError_unexpectedToken(T) {\ + char* tok_str = Compiler_extractTokenStr(cmp, T);\ + setError("unexpected character '%s'", tok_str);\ + free(tok_str);\ +} + + +static void parseDataDefinition(Compiler* cmp, char* instr_name, DataDefinition* dataDefPtr){ + +} + + +static void parseOperation(Compiler* cmp, char* instr_name, Operation* operPtr){ + +} + +bool Compiler_parse(Compiler* cmp){ + returnErrorIf_auto(cmp->state != CompilerState_Lexing); + cmp->state = CompilerState_Parsing; + Token tok; + Section* sec = NULL; + + while(cmp->tok_i < cmp->tokens.len){ + tok = cmp->tokens.data[cmp->tok_i]; + switch(tok.type){ + case TokenType_Unset: + returnError("token of undefined type"); + case TokenType_SingleLineComment: + case TokenType_MultiLineComment: + // skip comments + break; + case TokenType_Label: + // create new section + sec = List_Section_expand(&cmp->ast.sections); + Section_init(sec, Compiler_extractTokenStr(cmp, tok)); + break; + case TokenType_Instruction: + char* instr_name = Compiler_extractTokenStr(cmp, tok); + // data definition starts with const + if(cstr_seek(instr_name, "const", 0, 1)){ + DataDefinition* dataDefPtr = List_DataDefinition_expand(&sec->data); + parseDataDefinition(cmp, instr_name, dataDefPtr); + } + else { + Operation* operPtr = List_Operation_expand(&sec->code); + parseOperation(cmp, instr_name, operPtr); + } + break; + default: + setError_unexpectedToken(tok); + return false; + } + + if(cmp->state == CompilerState_Error) + return false; + + cmp->tok_i++; + } + + return true; +} diff --git a/src/compiler/Token.c b/src/compiler/Token.c new file mode 100644 index 0000000..cf62d87 --- /dev/null +++ b/src/compiler/Token.c @@ -0,0 +1,23 @@ +#include "Token.h" + +List_define(Token); + +static cstr _TokenType_str[] = { + "Unset", + "SingleLineComment", + "MultiLineComment", + "Instruction", + "Label", + "Number", + "Char", + "String", + "Name", + "NamedDataPointer", + "NamedDataSize" +}; + +cstr TokenType_toString(TokenType t){ + if(t >= ARRAY_SIZE(_TokenType_str)) + return "!!INDEX_ERROR!!"; + return _TokenType_str[t]; +} diff --git a/src/compiler/Token.h b/src/compiler/Token.h new file mode 100644 index 0000000..2a4faad --- /dev/null +++ b/src/compiler/Token.h @@ -0,0 +1,29 @@ +#pragma once +#include "../std.h" +#include "../collections/List.h" + +typedef enum TokenType { + TokenType_Unset, // initial value + TokenType_SingleLineComment, // //comment + TokenType_MultiLineComment, // /* comment */ + TokenType_Instruction, // abc + TokenType_Label, // .abc: + TokenType_Number, // 0123 + TokenType_Char, // 'A' + TokenType_String, // "aaaa" + TokenType_Name, // xyz + TokenType_NamedDataPointer, // @xyz + TokenType_NamedDataSize // #xyz +} TokenType; + +cstr TokenType_toString(TokenType t); + +typedef struct Token { + u32 begin; // some index in Compiler->code + u32 length : 24; // length in characters (24 bits) + TokenType type : 8; // type of token (8 bits) +} Token; + +List_declare(Token); + +#define Token_construct(TYPE, BEGIN, END) ((Token){ .type = TYPE, .begin = BEGIN, .length = END })