This commit is contained in:
Timerix 2025-01-18 13:56:46 +05:00
parent bd8215fd73
commit dbe8569a3b
11 changed files with 143 additions and 75 deletions

View File

@ -13,7 +13,7 @@
}\ }\
\ \
static inline List_##T List_##T##_alloc(u32 len){\ static inline List_##T List_##T##_alloc(u32 len){\
return List_##T##_construct(len > 0 ? malloc(len * sizeof(T)) : NULL, 0, 0);\ return List_##T##_construct((T*)(len > 0 ? malloc(len * sizeof(T)) : NULL), 0, 0);\
}\ }\
\ \
void List_##T##_push(List_##T* ptr, T value); void List_##T##_push(List_##T* ptr, T value);
@ -27,7 +27,7 @@
max_len += __List_padding_in_sizeof_T(T);\ max_len += __List_padding_in_sizeof_T(T);\
/* branchless version of max(max_len, __List_min_size) */\ /* branchless version of max(max_len, __List_min_size) */\
max_len += (max_len < __List_min_size) * (__List_min_size - max_len);\ max_len += (max_len < __List_min_size) * (__List_min_size - max_len);\
ptr->data = realloc(ptr->data, max_len * sizeof(T));\ ptr->data = (T*)realloc(ptr->data, max_len * sizeof(T));\
ptr->max_len = max_len;\ ptr->max_len = max_len;\
}\ }\
ptr->data[ptr->len++] = value;\ ptr->data[ptr->len++] = value;\

39
src/compiler/AST.h Normal file
View File

@ -0,0 +1,39 @@
#pragma once
#include "../std.h"
#include "../instructions/instructions.h"
#include "../collections/List.h"
typedef enum ArgumentType {
ArgumentType_NoArgument,
ArgumentType_Register,
ArgumentType_ConstValue,
ArgumentType_Name,
ArgumentType_NamedDataPointer,
ArgumentType_NamedDataSize,
};
typedef struct Argument {
ArgumentType type;
u32 value;
} Argument;
typedef struct DataDefinition {
u8 element_size;
u16 size;
void* data;
cstr name;
};
List_declare(DataDefinition);
typedef struct AST {
DataDefinition
} AST;
/*
d8 name ''
goto label
.label:
code...
*/

View File

@ -1,8 +1,8 @@
#include "compiler.h" #include "Lexer.h"
List_define(Token); List_define(Token);
CodePos Compiler_getLineAndColumn(Compiler* cmp, u32 pos){ CodePos Lexer_getLineAndColumn(Lexer* cmp, u32 pos){
u32 prev_lines_len = 0; u32 prev_lines_len = 0;
if(pos >= cmp->code_len) if(pos >= cmp->code_len)
return CodePos_create(0, 0); return CodePos_create(0, 0);
@ -17,18 +17,18 @@ CodePos Compiler_getLineAndColumn(Compiler* cmp, u32 pos){
return CodePos_create(0, 0); return CodePos_create(0, 0);
} }
static void completeLine(Compiler* cmp){ static void completeLine(Lexer* cmp){
List_u32_push(&cmp->line_lengths, cmp->column); List_u32_push(&cmp->line_lengths, cmp->column);
cmp->column = 0; cmp->column = 0;
} }
void _Compiler_setError(Compiler* cmp, cstr context, cstr format, ...){ void _Lexer_setError(Lexer* cmp, cstr context, cstr format, ...){
completeLine(cmp); completeLine(cmp);
// happens at the end of file // happens at the end of file
if(cmp->pos >= cmp->code_len) if(cmp->pos >= cmp->code_len)
cmp->pos = cmp->code_len - 1; cmp->pos = cmp->code_len - 1;
char position_str[32]; char position_str[32];
CodePos code_pos = Compiler_getLineAndColumn(cmp, cmp->pos); CodePos code_pos = Lexer_getLineAndColumn(cmp, cmp->pos);
sprintf(position_str, "[at %u:%u][", code_pos.line, code_pos.column); sprintf(position_str, "[at %u:%u][", code_pos.line, code_pos.column);
char* real_format = strcat_malloc(position_str, context, "] ", format); char* real_format = strcat_malloc(position_str, context, "] ", format);
va_list argv; va_list argv;
@ -40,12 +40,12 @@ void _Compiler_setError(Compiler* cmp, cstr context, cstr format, ...){
buf = malloc(16); buf = malloc(16);
strcpy(buf, "SPRINTF FAILED"); strcpy(buf, "SPRINTF FAILED");
} }
cmp->state = CompilerState_Error; cmp->state = LexerState_Error;
cmp->error_message = buf; cmp->error_message = buf;
} }
#define setError(FORMAT, ...) Compiler_setError(cmp, FORMAT, ##__VA_ARGS__) #define setError(FORMAT, ...) Lexer_setError(cmp, FORMAT, ##__VA_ARGS__)
#define returnError(FORMAT, ...) {\ #define returnError(FORMAT, ...) {\
setError(FORMAT, ##__VA_ARGS__);\ setError(FORMAT, ##__VA_ARGS__);\
@ -59,20 +59,20 @@ void _Compiler_setError(Compiler* cmp, cstr context, cstr format, ...){
#define Error_unexpectedCharacter(C) "unexpected character '%c'", C #define Error_unexpectedCharacter(C) "unexpected character '%c'", C
#define Error_endOfFile "unexpected end of file" #define Error_endOfFile "unexpected end of file"
void Compiler_init(Compiler* cmp){ void Lexer_init(Lexer* cmp){
memset(cmp, 0, sizeof(Compiler)); memset(cmp, 0, sizeof(Lexer));
cmp->state = CompilerState_Initial; cmp->state = LexerState_Initial;
cmp->tokens = List_Token_alloc(4096); cmp->tokens = List_Token_alloc(4096);
cmp->line_lengths = List_u32_alloc(1024); cmp->line_lengths = List_u32_alloc(1024);
} }
void Compiler_free(Compiler* cmp){ void Lexer_free(Lexer* cmp){
free(cmp->code); free(cmp->code);
free(cmp->tokens.data); free(cmp->tokens.data);
free(cmp->line_lengths.data); free(cmp->line_lengths.data);
} }
static void readCommentSingleLine(Compiler* cmp){ static void readCommentSingleLine(Lexer* cmp){
char c; // '/' char c; // '/'
Token tok = Token_construct(TokenType_SingleLineComment, cmp->pos - 1, 0); Token tok = Token_construct(TokenType_SingleLineComment, cmp->pos - 1, 0);
cmp->column++; cmp->column++;
@ -97,7 +97,7 @@ static void readCommentSingleLine(Compiler* cmp){
List_Token_push(&cmp->tokens, tok); List_Token_push(&cmp->tokens, tok);
} }
static void readCommentMultiLine(Compiler* cmp){ static void readCommentMultiLine(Lexer* cmp){
char c; // '*' char c; // '*'
Token tok = Token_construct(TokenType_MultiLineComment, cmp->pos - 1, 0); Token tok = Token_construct(TokenType_MultiLineComment, cmp->pos - 1, 0);
cmp->column++; cmp->column++;
@ -122,7 +122,7 @@ static void readCommentMultiLine(Compiler* cmp){
setError(Error_endOfFile); setError(Error_endOfFile);
} }
static void readComment(Compiler* cmp){ static void readComment(Lexer* cmp){
char c; // '/' char c; // '/'
if(cmp->pos + 1 == cmp->code_len){ if(cmp->pos + 1 == cmp->code_len){
setError(Error_endOfFile); setError(Error_endOfFile);
@ -144,7 +144,7 @@ static void readComment(Compiler* cmp){
else setError(Error_unexpectedCharacter(c)); else setError(Error_unexpectedCharacter(c));
} }
static void readLabel(Compiler* cmp){ static void readLabel(Lexer* cmp){
char c; // '.' char c; // '.'
cmp->pos++; cmp->pos++;
cmp->column++; cmp->column++;
@ -179,7 +179,7 @@ static void readLabel(Compiler* cmp){
else setError(Error_endOfFile); else setError(Error_endOfFile);
} }
static void readArguments(Compiler* cmp){ static void readArguments(Lexer* cmp){
char c; // space char c; // space
cmp->pos++; cmp->pos++;
cmp->column++; cmp->column++;
@ -215,7 +215,7 @@ static void readArguments(Compiler* cmp){
List_Token_push(&cmp->tokens, tok); List_Token_push(&cmp->tokens, tok);
} }
static void readInstruction(Compiler* cmp){ static void readInstruction(Lexer* cmp){
Token tok = Token_construct(TokenType_Instruction, cmp->pos, 0); Token tok = Token_construct(TokenType_Instruction, cmp->pos, 0);
cmp->pos++; cmp->pos++;
cmp->column++; cmp->column++;
@ -252,7 +252,7 @@ static void readInstruction(Compiler* cmp){
List_Token_push(&cmp->tokens, tok); List_Token_push(&cmp->tokens, tok);
} }
static void readChar(Compiler* cmp){ static void readChar(Lexer* cmp){
Token tok = Token_construct(TokenType_Char, cmp->pos, 0); Token tok = Token_construct(TokenType_Char, cmp->pos, 0);
cmp->pos++; cmp->pos++;
cmp->column++; cmp->column++;
@ -279,7 +279,7 @@ static void readChar(Compiler* cmp){
setError(Error_endOfFile); setError(Error_endOfFile);
} }
static void readString(Compiler* cmp){ static void readString(Lexer* cmp){
Token tok = Token_construct(TokenType_String, cmp->pos, 0); Token tok = Token_construct(TokenType_String, cmp->pos, 0);
cmp->pos++; cmp->pos++;
cmp->column++; cmp->column++;
@ -306,7 +306,7 @@ static void readString(Compiler* cmp){
setError(Error_endOfFile); setError(Error_endOfFile);
} }
static void readNumber(Compiler* cmp){ static void readNumber(Lexer* cmp){
Token tok = Token_construct(TokenType_Number, cmp->pos, 0); Token tok = Token_construct(TokenType_Number, cmp->pos, 0);
cmp->pos++; cmp->pos++;
cmp->column++; cmp->column++;
@ -329,9 +329,9 @@ static void readNumber(Compiler* cmp){
List_Token_push(&cmp->tokens, tok); List_Token_push(&cmp->tokens, tok);
} }
static bool lex(Compiler* cmp){ static bool lex(Lexer* cmp){
returnErrorIf_auto(cmp->state != CompilerState_Initial); returnErrorIf_auto(cmp->state != LexerState_Initial);
cmp->state = CompilerState_Lexing; cmp->state = LexerState_Lexing;
cmp->column = 1; cmp->column = 1;
while(cmp->pos < cmp->code_len){ while(cmp->pos < cmp->code_len){
@ -364,7 +364,7 @@ static bool lex(Compiler* cmp){
break; break;
} }
if(cmp->state == CompilerState_Error) if(cmp->state == LexerState_Error)
return false; return false;
c = cmp->code[cmp->pos]; c = cmp->code[cmp->pos];
@ -378,20 +378,20 @@ static bool lex(Compiler* cmp){
return true; return true;
} }
static bool parse(Compiler* cmp){ static bool parse(Lexer* cmp){
returnErrorIf_auto(cmp->state != CompilerState_Lexing); returnErrorIf_auto(cmp->state != LexerState_Lexing);
cmp->state = CompilerState_Parsing; cmp->state = LexerState_Parsing;
return true; return true;
} }
static bool compile(Compiler* cmp, FILE* f){ static bool compile(Lexer* cmp, FILE* f){
returnErrorIf_auto(cmp->state != CompilerState_Parsing); returnErrorIf_auto(cmp->state != LexerState_Parsing);
cmp->state = CompilerState_Compiling; cmp->state = LexerState_Compiling;
return true; return true;
} }
bool Compiler_compileTasm(Compiler* cmp, cstr source_file_name, cstr out_file_name, bool debug){ bool Lexer_compileTasm(Lexer* cmp, cstr source_file_name, cstr out_file_name, bool debug){
FILE* f = fopen(source_file_name, "rb"); FILE* f = fopen(source_file_name, "rb");
if(f == NULL) if(f == NULL)
returnError("ERROR: can't open file '%s'", source_file_name); returnError("ERROR: can't open file '%s'", source_file_name);
@ -439,7 +439,7 @@ bool Compiler_compileTasm(Compiler* cmp, cstr source_file_name, cstr out_file_na
printf("------------------------------------[tokens]-----------------------------------\n"); printf("------------------------------------[tokens]-----------------------------------\n");
for(u32 i = 0; i < cmp->tokens.len; i++){ for(u32 i = 0; i < cmp->tokens.len; i++){
Token t = cmp->tokens.data[i]; Token t = cmp->tokens.data[i];
CodePos pos = Compiler_getLineAndColumn(cmp, t.begin); CodePos pos = Lexer_getLineAndColumn(cmp, t.begin);
char* tokstr = malloc(4096); char* tokstr = malloc(4096);
strncpy(tokstr, cmp->code + t.begin, t.length); strncpy(tokstr, cmp->code + t.begin, t.length);
tokstr[t.length] = 0; tokstr[t.length] = 0;
@ -463,7 +463,7 @@ bool Compiler_compileTasm(Compiler* cmp, cstr source_file_name, cstr out_file_na
success = compile(cmp, f); success = compile(cmp, f);
fclose(f); fclose(f);
if(success){ if(success){
cmp->state = CompilerState_Success; cmp->state = LexerState_Success;
} }
return success; return success;

View File

@ -2,38 +2,39 @@
#include "../std.h" #include "../std.h"
#include "../collections/List.h" #include "../collections/List.h"
#include "token.h" #include "token.h"
#include "AST.h"
List_declare(Token); List_declare(Token);
typedef enum CompilerState { typedef enum LexerState {
CompilerState_Initial, LexerState_Initial,
CompilerState_Lexing, LexerState_Lexing,
CompilerState_Parsing, LexerState_Parsing,
CompilerState_Compiling, LexerState_Compiling,
CompilerState_Error, LexerState_Error,
CompilerState_Success LexerState_Success
} CompilerState; } LexerState;
typedef struct Compiler { typedef struct Lexer {
char* code; char* code;
u32 code_len; u32 code_len;
u32 column; // > 0 if code parsing started u32 column; // > 0 if code parsing started
u32 pos; u32 pos;
CompilerState state; LexerState state;
NULLABLE(char* error_message); NULLABLE(char* error_message);
List_Token tokens; List_Token tokens;
List_u32 line_lengths; List_u32 line_lengths;
} Compiler; } Lexer;
void Compiler_init(Compiler* cmp); void Lexer_init(Lexer* cmp);
void Compiler_free(Compiler* cmp); void Lexer_free(Lexer* cmp);
/// @brief compile assembly language code to machine code /// @brief compile assembly language code to machine code
/// @return true if no errors, false if any error occured (check cmp->error_message) /// @return true if no errors, false if any error occured (check cmp->error_message)
bool Compiler_compileTasm(Compiler* cmp, cstr source_file_name, cstr out_file_name, bool debug); bool Lexer_compileTasm(Lexer* cmp, cstr source_file_name, cstr out_file_name, bool debug);
#define Compiler_setError(cmp, format, ...) _Compiler_setError(cmp, __func__, format ,##__VA_ARGS__) #define Lexer_setError(cmp, format, ...) _Lexer_setError(cmp, __func__, format ,##__VA_ARGS__)
void _Compiler_setError(Compiler* cmp, cstr context, cstr format, ...) __attribute__((__format__(__printf__, 3, 4))); void _Lexer_setError(Lexer* cmp, cstr context, cstr format, ...) __attribute__((__format__(__printf__, 3, 4)));
typedef struct CodePos { typedef struct CodePos {
u32 line; // 0 on error u32 line; // 0 on error
@ -43,5 +44,5 @@ typedef struct CodePos {
#define CodePos_create(L, C) ((CodePos){ .line = L, .column = C }) #define CodePos_create(L, C) ((CodePos){ .line = L, .column = C })
/// @param pos index in code buffer /// @param pos index in code buffer
CodePos Compiler_getLineAndColumn(Compiler* cmp, u32 pos); CodePos Lexer_getLineAndColumn(Lexer* cmp, u32 pos);

View File

3
src/compiler/parser.c Normal file
View File

@ -0,0 +1,3 @@
#include "compiler.h"
List_define(DataDefinition);

View File

@ -5,15 +5,19 @@ static cstr _TokenType_str[] = {
"SingleLineComment", "SingleLineComment",
"MultiLineComment", "MultiLineComment",
"Label", "Label",
"Instruction", "DataDefinition",
"Argument",
"Number", "Number",
"Char", "Char",
"String", "String",
"Instruction",
"Register",
"DataType",
"DataPointer",
"DataSize"
}; };
cstr TokenType_toString(TokenType t){ cstr TokenType_toString(TokenType t){
if(t >= sizeof(_TokenType_str) / sizeof(cstr)) if(t >= ARRAY_SIZE(_TokenType_str))
return "!!INDEX_ERROR!!"; return "!!INDEX_ERROR!!";
return _TokenType_str[t]; return _TokenType_str[t];
} }

View File

@ -6,11 +6,15 @@ typedef enum TokenType {
TokenType_SingleLineComment, TokenType_SingleLineComment,
TokenType_MultiLineComment, TokenType_MultiLineComment,
TokenType_Label, TokenType_Label,
TokenType_Instruction, TOkenType_DataDefinition,
TokenType_Argument,
TokenType_Number, TokenType_Number,
TokenType_Char, TokenType_Char,
TokenType_String, TokenType_String,
TokenType_Instruction,
TokenType_Register,
TokenType_DataType,
TokenType_DataPointer,
TokenType_DataSize
} TokenType; } TokenType;
cstr TokenType_toString(TokenType t); cstr TokenType_toString(TokenType t);

View File

@ -1,5 +1,19 @@
#include "instructions.h" #include "instructions.h"
i32 NOP_impl(VM* vm);
i32 PUSH_impl(VM* vm);
i32 MOV_impl(VM* vm);
i32 ADD_impl(VM* vm);
i32 SUB_impl(VM* vm);
i32 MUL_impl(VM* vm);
i32 DIV_impl(VM* vm);
i32 MOD_impl(VM* vm);
i32 SYS_impl(VM* vm);
i32 EXIT_impl(VM* vm);
i32 JMP_impl(VM* vm);
i32 CALL_impl(VM* vm);
const Instruction instructions[] = { const Instruction instructions[] = {
Instruction_construct(NOP), Instruction_construct(NOP),
Instruction_construct(PUSH), Instruction_construct(PUSH),
@ -14,10 +28,9 @@ const Instruction instructions[] = {
// Instruction_construct(JMP), // Instruction_construct(JMP),
// Instruction_construct(CALL), // Instruction_construct(CALL),
}; };
const size_t instructions_count = sizeof(instructions)/sizeof(instructions[0]);
const Instruction* NULLABLE(Instruction_getFromOpcode)(u8 opcode){ const Instruction* Instruction_getFromOpcode(Opcode opcode){
if(opcode >= instructions_count) if(opcode >= ARRAY_SIZE(instructions))
return NULL; return NULL;
return instructions + opcode; return instructions + opcode;

View File

@ -5,30 +5,32 @@
///@returns number of bytes read ///@returns number of bytes read
typedef i32 (*InstructionImplFunc_t)(VM* vm); typedef i32 (*InstructionImplFunc_t)(VM* vm);
typedef enum __attribute__((__packed__)) Opcode {
Opcode_NOP,
Opcode_PUSH,
Opcode_MOV,
Opcode_ADD,
Opcode_SUB,
Opcode_MUL,
Opcode_DIV,
Opcode_MOD,
Opcode_SYS,
Opcode_EXIT,
} Opcode;
typedef struct Instruction { typedef struct Instruction {
cstr name; cstr name;
InstructionImplFunc_t implementation; InstructionImplFunc_t implementation;
Opcode opcode;
} Instruction; } Instruction;
#define Instruction_construct(NAME) {\ #define Instruction_construct(NAME) {\
.name = #NAME, \ .name = #NAME, \
.implementation = NAME##_impl \ .implementation = NAME##_impl, \
.opcode = Opcode_##NAME\
} }
/// @brief get instruction info from table /// @brief get instruction info from table
/// @param opcode any byte /// @param opcode any byte
/// @return ptr to struct or NULL /// @return ptr to struct or NULL
const Instruction* NULLABLE(Instruction_getFromOpcode)(u8 opcode); const Instruction* NULLABLE(Instruction_getFromOpcode)(Opcode opcode);
i32 NOP_impl(VM* vm);
i32 PUSH_impl(VM* vm);
i32 MOV_impl(VM* vm);
i32 ADD_impl(VM* vm);
i32 SUB_impl(VM* vm);
i32 MUL_impl(VM* vm);
i32 DIV_impl(VM* vm);
i32 MOD_impl(VM* vm);
i32 SYS_impl(VM* vm);
i32 EXIT_impl(VM* vm);
i32 JMP_impl(VM* vm);
i32 CALL_impl(VM* vm);

View File

@ -26,6 +26,8 @@ typedef u8 bool;
typedef const char* cstr; typedef const char* cstr;
#define ARRAY_SIZE(A) sizeof(A)/sizeof(A[0])
#define __count_args( \ #define __count_args( \
a0, a1, a2, a3, a4, a5, a6, a7 , a8, a9, a10,a11,a12,a13,a14,a15, \ a0, a1, a2, a3, a4, a5, a6, a7 , a8, a9, a10,a11,a12,a13,a14,a15, \
a16,a17,a18,a19,a20,a21,a22,a23, a24,a25,a26,a27,a28,a29,a30,a31, \ a16,a17,a18,a19,a20,a21,a22,a23, a24,a25,a26,a27,a28,a29,a30,a31, \