diff --git a/src/collections/List.h b/src/collections/List.h index b5adcad..633f04b 100644 --- a/src/collections/List.h +++ b/src/collections/List.h @@ -13,7 +13,7 @@ }\ \ static inline List_##T List_##T##_alloc(u32 len){\ - return List_##T##_construct(len > 0 ? malloc(len * sizeof(T)) : NULL, 0, 0);\ + return List_##T##_construct((T*)(len > 0 ? malloc(len * sizeof(T)) : NULL), 0, 0);\ }\ \ void List_##T##_push(List_##T* ptr, T value); @@ -27,7 +27,7 @@ max_len += __List_padding_in_sizeof_T(T);\ /* branchless version of max(max_len, __List_min_size) */\ max_len += (max_len < __List_min_size) * (__List_min_size - max_len);\ - ptr->data = realloc(ptr->data, max_len * sizeof(T));\ + ptr->data = (T*)realloc(ptr->data, max_len * sizeof(T));\ ptr->max_len = max_len;\ }\ ptr->data[ptr->len++] = value;\ diff --git a/src/compiler/AST.h b/src/compiler/AST.h new file mode 100644 index 0000000..ad37846 --- /dev/null +++ b/src/compiler/AST.h @@ -0,0 +1,39 @@ +#pragma once +#include "../std.h" +#include "../instructions/instructions.h" +#include "../collections/List.h" + +typedef enum ArgumentType { + ArgumentType_NoArgument, + ArgumentType_Register, + ArgumentType_ConstValue, + ArgumentType_Name, + ArgumentType_NamedDataPointer, + ArgumentType_NamedDataSize, +}; + +typedef struct Argument { + ArgumentType type; + u32 value; +} Argument; + +typedef struct DataDefinition { + u8 element_size; + u16 size; + void* data; + cstr name; +}; + +List_declare(DataDefinition); + +typedef struct AST { + DataDefinition +} AST; + +/* +d8 name '' + +goto label +.label: +code... +*/ \ No newline at end of file diff --git a/src/compiler/compiler.c b/src/compiler/compiler.c index c4130f9..79bb79a 100644 --- a/src/compiler/compiler.c +++ b/src/compiler/compiler.c @@ -1,8 +1,8 @@ -#include "compiler.h" +#include "Lexer.h" List_define(Token); -CodePos Compiler_getLineAndColumn(Compiler* cmp, u32 pos){ +CodePos Lexer_getLineAndColumn(Lexer* cmp, u32 pos){ u32 prev_lines_len = 0; if(pos >= cmp->code_len) return CodePos_create(0, 0); @@ -17,18 +17,18 @@ CodePos Compiler_getLineAndColumn(Compiler* cmp, u32 pos){ return CodePos_create(0, 0); } -static void completeLine(Compiler* cmp){ +static void completeLine(Lexer* cmp){ List_u32_push(&cmp->line_lengths, cmp->column); cmp->column = 0; } -void _Compiler_setError(Compiler* cmp, cstr context, cstr format, ...){ +void _Lexer_setError(Lexer* cmp, cstr context, cstr format, ...){ completeLine(cmp); // happens at the end of file if(cmp->pos >= cmp->code_len) cmp->pos = cmp->code_len - 1; char position_str[32]; - CodePos code_pos = Compiler_getLineAndColumn(cmp, cmp->pos); + CodePos code_pos = Lexer_getLineAndColumn(cmp, cmp->pos); sprintf(position_str, "[at %u:%u][", code_pos.line, code_pos.column); char* real_format = strcat_malloc(position_str, context, "] ", format); va_list argv; @@ -40,12 +40,12 @@ void _Compiler_setError(Compiler* cmp, cstr context, cstr format, ...){ buf = malloc(16); strcpy(buf, "SPRINTF FAILED"); } - cmp->state = CompilerState_Error; + cmp->state = LexerState_Error; cmp->error_message = buf; } -#define setError(FORMAT, ...) Compiler_setError(cmp, FORMAT, ##__VA_ARGS__) +#define setError(FORMAT, ...) Lexer_setError(cmp, FORMAT, ##__VA_ARGS__) #define returnError(FORMAT, ...) {\ setError(FORMAT, ##__VA_ARGS__);\ @@ -59,20 +59,20 @@ void _Compiler_setError(Compiler* cmp, cstr context, cstr format, ...){ #define Error_unexpectedCharacter(C) "unexpected character '%c'", C #define Error_endOfFile "unexpected end of file" -void Compiler_init(Compiler* cmp){ - memset(cmp, 0, sizeof(Compiler)); - cmp->state = CompilerState_Initial; +void Lexer_init(Lexer* cmp){ + memset(cmp, 0, sizeof(Lexer)); + cmp->state = LexerState_Initial; cmp->tokens = List_Token_alloc(4096); cmp->line_lengths = List_u32_alloc(1024); } -void Compiler_free(Compiler* cmp){ +void Lexer_free(Lexer* cmp){ free(cmp->code); free(cmp->tokens.data); free(cmp->line_lengths.data); } -static void readCommentSingleLine(Compiler* cmp){ +static void readCommentSingleLine(Lexer* cmp){ char c; // '/' Token tok = Token_construct(TokenType_SingleLineComment, cmp->pos - 1, 0); cmp->column++; @@ -97,7 +97,7 @@ static void readCommentSingleLine(Compiler* cmp){ List_Token_push(&cmp->tokens, tok); } -static void readCommentMultiLine(Compiler* cmp){ +static void readCommentMultiLine(Lexer* cmp){ char c; // '*' Token tok = Token_construct(TokenType_MultiLineComment, cmp->pos - 1, 0); cmp->column++; @@ -122,7 +122,7 @@ static void readCommentMultiLine(Compiler* cmp){ setError(Error_endOfFile); } -static void readComment(Compiler* cmp){ +static void readComment(Lexer* cmp){ char c; // '/' if(cmp->pos + 1 == cmp->code_len){ setError(Error_endOfFile); @@ -144,7 +144,7 @@ static void readComment(Compiler* cmp){ else setError(Error_unexpectedCharacter(c)); } -static void readLabel(Compiler* cmp){ +static void readLabel(Lexer* cmp){ char c; // '.' cmp->pos++; cmp->column++; @@ -179,7 +179,7 @@ static void readLabel(Compiler* cmp){ else setError(Error_endOfFile); } -static void readArguments(Compiler* cmp){ +static void readArguments(Lexer* cmp){ char c; // space cmp->pos++; cmp->column++; @@ -215,7 +215,7 @@ static void readArguments(Compiler* cmp){ List_Token_push(&cmp->tokens, tok); } -static void readInstruction(Compiler* cmp){ +static void readInstruction(Lexer* cmp){ Token tok = Token_construct(TokenType_Instruction, cmp->pos, 0); cmp->pos++; cmp->column++; @@ -252,7 +252,7 @@ static void readInstruction(Compiler* cmp){ List_Token_push(&cmp->tokens, tok); } -static void readChar(Compiler* cmp){ +static void readChar(Lexer* cmp){ Token tok = Token_construct(TokenType_Char, cmp->pos, 0); cmp->pos++; cmp->column++; @@ -279,7 +279,7 @@ static void readChar(Compiler* cmp){ setError(Error_endOfFile); } -static void readString(Compiler* cmp){ +static void readString(Lexer* cmp){ Token tok = Token_construct(TokenType_String, cmp->pos, 0); cmp->pos++; cmp->column++; @@ -306,7 +306,7 @@ static void readString(Compiler* cmp){ setError(Error_endOfFile); } -static void readNumber(Compiler* cmp){ +static void readNumber(Lexer* cmp){ Token tok = Token_construct(TokenType_Number, cmp->pos, 0); cmp->pos++; cmp->column++; @@ -329,9 +329,9 @@ static void readNumber(Compiler* cmp){ List_Token_push(&cmp->tokens, tok); } -static bool lex(Compiler* cmp){ - returnErrorIf_auto(cmp->state != CompilerState_Initial); - cmp->state = CompilerState_Lexing; +static bool lex(Lexer* cmp){ + returnErrorIf_auto(cmp->state != LexerState_Initial); + cmp->state = LexerState_Lexing; cmp->column = 1; while(cmp->pos < cmp->code_len){ @@ -364,7 +364,7 @@ static bool lex(Compiler* cmp){ break; } - if(cmp->state == CompilerState_Error) + if(cmp->state == LexerState_Error) return false; c = cmp->code[cmp->pos]; @@ -378,20 +378,20 @@ static bool lex(Compiler* cmp){ return true; } -static bool parse(Compiler* cmp){ - returnErrorIf_auto(cmp->state != CompilerState_Lexing); - cmp->state = CompilerState_Parsing; +static bool parse(Lexer* cmp){ + returnErrorIf_auto(cmp->state != LexerState_Lexing); + cmp->state = LexerState_Parsing; return true; } -static bool compile(Compiler* cmp, FILE* f){ - returnErrorIf_auto(cmp->state != CompilerState_Parsing); - cmp->state = CompilerState_Compiling; +static bool compile(Lexer* cmp, FILE* f){ + returnErrorIf_auto(cmp->state != LexerState_Parsing); + cmp->state = LexerState_Compiling; return true; } -bool Compiler_compileTasm(Compiler* cmp, cstr source_file_name, cstr out_file_name, bool debug){ +bool Lexer_compileTasm(Lexer* cmp, cstr source_file_name, cstr out_file_name, bool debug){ FILE* f = fopen(source_file_name, "rb"); if(f == NULL) returnError("ERROR: can't open file '%s'", source_file_name); @@ -439,7 +439,7 @@ bool Compiler_compileTasm(Compiler* cmp, cstr source_file_name, cstr out_file_na printf("------------------------------------[tokens]-----------------------------------\n"); for(u32 i = 0; i < cmp->tokens.len; i++){ Token t = cmp->tokens.data[i]; - CodePos pos = Compiler_getLineAndColumn(cmp, t.begin); + CodePos pos = Lexer_getLineAndColumn(cmp, t.begin); char* tokstr = malloc(4096); strncpy(tokstr, cmp->code + t.begin, t.length); tokstr[t.length] = 0; @@ -463,7 +463,7 @@ bool Compiler_compileTasm(Compiler* cmp, cstr source_file_name, cstr out_file_na success = compile(cmp, f); fclose(f); if(success){ - cmp->state = CompilerState_Success; + cmp->state = LexerState_Success; } return success; diff --git a/src/compiler/compiler.h b/src/compiler/compiler.h index 3de4227..5390407 100644 --- a/src/compiler/compiler.h +++ b/src/compiler/compiler.h @@ -2,38 +2,39 @@ #include "../std.h" #include "../collections/List.h" #include "token.h" +#include "AST.h" List_declare(Token); -typedef enum CompilerState { - CompilerState_Initial, - CompilerState_Lexing, - CompilerState_Parsing, - CompilerState_Compiling, - CompilerState_Error, - CompilerState_Success -} CompilerState; +typedef enum LexerState { + LexerState_Initial, + LexerState_Lexing, + LexerState_Parsing, + LexerState_Compiling, + LexerState_Error, + LexerState_Success +} LexerState; -typedef struct Compiler { +typedef struct Lexer { char* code; u32 code_len; u32 column; // > 0 if code parsing started u32 pos; - CompilerState state; + LexerState state; NULLABLE(char* error_message); List_Token tokens; List_u32 line_lengths; -} Compiler; +} Lexer; -void Compiler_init(Compiler* cmp); -void Compiler_free(Compiler* cmp); +void Lexer_init(Lexer* cmp); +void Lexer_free(Lexer* cmp); /// @brief compile assembly language code to machine code /// @return true if no errors, false if any error occured (check cmp->error_message) -bool Compiler_compileTasm(Compiler* cmp, cstr source_file_name, cstr out_file_name, bool debug); +bool Lexer_compileTasm(Lexer* cmp, cstr source_file_name, cstr out_file_name, bool debug); -#define Compiler_setError(cmp, format, ...) _Compiler_setError(cmp, __func__, format ,##__VA_ARGS__) -void _Compiler_setError(Compiler* cmp, cstr context, cstr format, ...) __attribute__((__format__(__printf__, 3, 4))); +#define Lexer_setError(cmp, format, ...) _Lexer_setError(cmp, __func__, format ,##__VA_ARGS__) +void _Lexer_setError(Lexer* cmp, cstr context, cstr format, ...) __attribute__((__format__(__printf__, 3, 4))); typedef struct CodePos { u32 line; // 0 on error @@ -43,5 +44,5 @@ typedef struct CodePos { #define CodePos_create(L, C) ((CodePos){ .line = L, .column = C }) /// @param pos index in code buffer -CodePos Compiler_getLineAndColumn(Compiler* cmp, u32 pos); +CodePos Lexer_getLineAndColumn(Lexer* cmp, u32 pos); diff --git a/src/compiler/compiler_internal.h b/src/compiler/compiler_internal.h new file mode 100644 index 0000000..e69de29 diff --git a/src/compiler/parser.c b/src/compiler/parser.c new file mode 100644 index 0000000..5de8c9d --- /dev/null +++ b/src/compiler/parser.c @@ -0,0 +1,3 @@ +#include "compiler.h" + +List_define(DataDefinition); diff --git a/src/compiler/token.c b/src/compiler/token.c index 29ffecd..e5f7961 100644 --- a/src/compiler/token.c +++ b/src/compiler/token.c @@ -5,15 +5,19 @@ static cstr _TokenType_str[] = { "SingleLineComment", "MultiLineComment", "Label", - "Instruction", - "Argument", + "DataDefinition", "Number", "Char", "String", + "Instruction", + "Register", + "DataType", + "DataPointer", + "DataSize" }; cstr TokenType_toString(TokenType t){ - if(t >= sizeof(_TokenType_str) / sizeof(cstr)) + if(t >= ARRAY_SIZE(_TokenType_str)) return "!!INDEX_ERROR!!"; return _TokenType_str[t]; } diff --git a/src/compiler/token.h b/src/compiler/token.h index 8e3d744..a5fee2c 100644 --- a/src/compiler/token.h +++ b/src/compiler/token.h @@ -6,11 +6,15 @@ typedef enum TokenType { TokenType_SingleLineComment, TokenType_MultiLineComment, TokenType_Label, - TokenType_Instruction, - TokenType_Argument, + TOkenType_DataDefinition, TokenType_Number, TokenType_Char, TokenType_String, + TokenType_Instruction, + TokenType_Register, + TokenType_DataType, + TokenType_DataPointer, + TokenType_DataSize } TokenType; cstr TokenType_toString(TokenType t); diff --git a/src/instructions/instructions.c b/src/instructions/instructions.c index 9b96dbc..1431253 100644 --- a/src/instructions/instructions.c +++ b/src/instructions/instructions.c @@ -1,5 +1,19 @@ #include "instructions.h" +i32 NOP_impl(VM* vm); +i32 PUSH_impl(VM* vm); +i32 MOV_impl(VM* vm); +i32 ADD_impl(VM* vm); +i32 SUB_impl(VM* vm); +i32 MUL_impl(VM* vm); +i32 DIV_impl(VM* vm); +i32 MOD_impl(VM* vm); +i32 SYS_impl(VM* vm); +i32 EXIT_impl(VM* vm); +i32 JMP_impl(VM* vm); +i32 CALL_impl(VM* vm); + + const Instruction instructions[] = { Instruction_construct(NOP), Instruction_construct(PUSH), @@ -14,10 +28,9 @@ const Instruction instructions[] = { // Instruction_construct(JMP), // Instruction_construct(CALL), }; -const size_t instructions_count = sizeof(instructions)/sizeof(instructions[0]); -const Instruction* NULLABLE(Instruction_getFromOpcode)(u8 opcode){ - if(opcode >= instructions_count) +const Instruction* Instruction_getFromOpcode(Opcode opcode){ + if(opcode >= ARRAY_SIZE(instructions)) return NULL; return instructions + opcode; diff --git a/src/instructions/instructions.h b/src/instructions/instructions.h index ba670b2..7f06caa 100644 --- a/src/instructions/instructions.h +++ b/src/instructions/instructions.h @@ -5,30 +5,32 @@ ///@returns number of bytes read typedef i32 (*InstructionImplFunc_t)(VM* vm); +typedef enum __attribute__((__packed__)) Opcode { + Opcode_NOP, + Opcode_PUSH, + Opcode_MOV, + Opcode_ADD, + Opcode_SUB, + Opcode_MUL, + Opcode_DIV, + Opcode_MOD, + Opcode_SYS, + Opcode_EXIT, +} Opcode; + typedef struct Instruction { cstr name; InstructionImplFunc_t implementation; + Opcode opcode; } Instruction; #define Instruction_construct(NAME) {\ .name = #NAME, \ - .implementation = NAME##_impl \ + .implementation = NAME##_impl, \ + .opcode = Opcode_##NAME\ } /// @brief get instruction info from table /// @param opcode any byte /// @return ptr to struct or NULL -const Instruction* NULLABLE(Instruction_getFromOpcode)(u8 opcode); - -i32 NOP_impl(VM* vm); -i32 PUSH_impl(VM* vm); -i32 MOV_impl(VM* vm); -i32 ADD_impl(VM* vm); -i32 SUB_impl(VM* vm); -i32 MUL_impl(VM* vm); -i32 DIV_impl(VM* vm); -i32 MOD_impl(VM* vm); -i32 SYS_impl(VM* vm); -i32 EXIT_impl(VM* vm); -i32 JMP_impl(VM* vm); -i32 CALL_impl(VM* vm); +const Instruction* NULLABLE(Instruction_getFromOpcode)(Opcode opcode); diff --git a/src/std.h b/src/std.h index dbdb070..bb63bfa 100644 --- a/src/std.h +++ b/src/std.h @@ -26,6 +26,8 @@ typedef u8 bool; typedef const char* cstr; +#define ARRAY_SIZE(A) sizeof(A)/sizeof(A[0]) + #define __count_args( \ a0, a1, a2, a3, a4, a5, a6, a7 , a8, a9, a10,a11,a12,a13,a14,a15, \ a16,a17,a18,a19,a20,a21,a22,a23, a24,a25,a26,a27,a28,a29,a30,a31, \