parser
This commit is contained in:
parent
bd8215fd73
commit
dbe8569a3b
@ -13,7 +13,7 @@
|
||||
}\
|
||||
\
|
||||
static inline List_##T List_##T##_alloc(u32 len){\
|
||||
return List_##T##_construct(len > 0 ? malloc(len * sizeof(T)) : NULL, 0, 0);\
|
||||
return List_##T##_construct((T*)(len > 0 ? malloc(len * sizeof(T)) : NULL), 0, 0);\
|
||||
}\
|
||||
\
|
||||
void List_##T##_push(List_##T* ptr, T value);
|
||||
@ -27,7 +27,7 @@
|
||||
max_len += __List_padding_in_sizeof_T(T);\
|
||||
/* branchless version of max(max_len, __List_min_size) */\
|
||||
max_len += (max_len < __List_min_size) * (__List_min_size - max_len);\
|
||||
ptr->data = realloc(ptr->data, max_len * sizeof(T));\
|
||||
ptr->data = (T*)realloc(ptr->data, max_len * sizeof(T));\
|
||||
ptr->max_len = max_len;\
|
||||
}\
|
||||
ptr->data[ptr->len++] = value;\
|
||||
|
||||
39
src/compiler/AST.h
Normal file
39
src/compiler/AST.h
Normal file
@ -0,0 +1,39 @@
|
||||
#pragma once
|
||||
#include "../std.h"
|
||||
#include "../instructions/instructions.h"
|
||||
#include "../collections/List.h"
|
||||
|
||||
typedef enum ArgumentType {
|
||||
ArgumentType_NoArgument,
|
||||
ArgumentType_Register,
|
||||
ArgumentType_ConstValue,
|
||||
ArgumentType_Name,
|
||||
ArgumentType_NamedDataPointer,
|
||||
ArgumentType_NamedDataSize,
|
||||
};
|
||||
|
||||
typedef struct Argument {
|
||||
ArgumentType type;
|
||||
u32 value;
|
||||
} Argument;
|
||||
|
||||
typedef struct DataDefinition {
|
||||
u8 element_size;
|
||||
u16 size;
|
||||
void* data;
|
||||
cstr name;
|
||||
};
|
||||
|
||||
List_declare(DataDefinition);
|
||||
|
||||
typedef struct AST {
|
||||
DataDefinition
|
||||
} AST;
|
||||
|
||||
/*
|
||||
d8 name ''
|
||||
|
||||
goto label
|
||||
.label:
|
||||
code...
|
||||
*/
|
||||
@ -1,8 +1,8 @@
|
||||
#include "compiler.h"
|
||||
#include "Lexer.h"
|
||||
|
||||
List_define(Token);
|
||||
|
||||
CodePos Compiler_getLineAndColumn(Compiler* cmp, u32 pos){
|
||||
CodePos Lexer_getLineAndColumn(Lexer* cmp, u32 pos){
|
||||
u32 prev_lines_len = 0;
|
||||
if(pos >= cmp->code_len)
|
||||
return CodePos_create(0, 0);
|
||||
@ -17,18 +17,18 @@ CodePos Compiler_getLineAndColumn(Compiler* cmp, u32 pos){
|
||||
return CodePos_create(0, 0);
|
||||
}
|
||||
|
||||
static void completeLine(Compiler* cmp){
|
||||
static void completeLine(Lexer* cmp){
|
||||
List_u32_push(&cmp->line_lengths, cmp->column);
|
||||
cmp->column = 0;
|
||||
}
|
||||
|
||||
void _Compiler_setError(Compiler* cmp, cstr context, cstr format, ...){
|
||||
void _Lexer_setError(Lexer* cmp, cstr context, cstr format, ...){
|
||||
completeLine(cmp);
|
||||
// happens at the end of file
|
||||
if(cmp->pos >= cmp->code_len)
|
||||
cmp->pos = cmp->code_len - 1;
|
||||
char position_str[32];
|
||||
CodePos code_pos = Compiler_getLineAndColumn(cmp, cmp->pos);
|
||||
CodePos code_pos = Lexer_getLineAndColumn(cmp, cmp->pos);
|
||||
sprintf(position_str, "[at %u:%u][", code_pos.line, code_pos.column);
|
||||
char* real_format = strcat_malloc(position_str, context, "] ", format);
|
||||
va_list argv;
|
||||
@ -40,12 +40,12 @@ void _Compiler_setError(Compiler* cmp, cstr context, cstr format, ...){
|
||||
buf = malloc(16);
|
||||
strcpy(buf, "SPRINTF FAILED");
|
||||
}
|
||||
cmp->state = CompilerState_Error;
|
||||
cmp->state = LexerState_Error;
|
||||
cmp->error_message = buf;
|
||||
}
|
||||
|
||||
|
||||
#define setError(FORMAT, ...) Compiler_setError(cmp, FORMAT, ##__VA_ARGS__)
|
||||
#define setError(FORMAT, ...) Lexer_setError(cmp, FORMAT, ##__VA_ARGS__)
|
||||
|
||||
#define returnError(FORMAT, ...) {\
|
||||
setError(FORMAT, ##__VA_ARGS__);\
|
||||
@ -59,20 +59,20 @@ void _Compiler_setError(Compiler* cmp, cstr context, cstr format, ...){
|
||||
#define Error_unexpectedCharacter(C) "unexpected character '%c'", C
|
||||
#define Error_endOfFile "unexpected end of file"
|
||||
|
||||
void Compiler_init(Compiler* cmp){
|
||||
memset(cmp, 0, sizeof(Compiler));
|
||||
cmp->state = CompilerState_Initial;
|
||||
void Lexer_init(Lexer* cmp){
|
||||
memset(cmp, 0, sizeof(Lexer));
|
||||
cmp->state = LexerState_Initial;
|
||||
cmp->tokens = List_Token_alloc(4096);
|
||||
cmp->line_lengths = List_u32_alloc(1024);
|
||||
}
|
||||
|
||||
void Compiler_free(Compiler* cmp){
|
||||
void Lexer_free(Lexer* cmp){
|
||||
free(cmp->code);
|
||||
free(cmp->tokens.data);
|
||||
free(cmp->line_lengths.data);
|
||||
}
|
||||
|
||||
static void readCommentSingleLine(Compiler* cmp){
|
||||
static void readCommentSingleLine(Lexer* cmp){
|
||||
char c; // '/'
|
||||
Token tok = Token_construct(TokenType_SingleLineComment, cmp->pos - 1, 0);
|
||||
cmp->column++;
|
||||
@ -97,7 +97,7 @@ static void readCommentSingleLine(Compiler* cmp){
|
||||
List_Token_push(&cmp->tokens, tok);
|
||||
}
|
||||
|
||||
static void readCommentMultiLine(Compiler* cmp){
|
||||
static void readCommentMultiLine(Lexer* cmp){
|
||||
char c; // '*'
|
||||
Token tok = Token_construct(TokenType_MultiLineComment, cmp->pos - 1, 0);
|
||||
cmp->column++;
|
||||
@ -122,7 +122,7 @@ static void readCommentMultiLine(Compiler* cmp){
|
||||
setError(Error_endOfFile);
|
||||
}
|
||||
|
||||
static void readComment(Compiler* cmp){
|
||||
static void readComment(Lexer* cmp){
|
||||
char c; // '/'
|
||||
if(cmp->pos + 1 == cmp->code_len){
|
||||
setError(Error_endOfFile);
|
||||
@ -144,7 +144,7 @@ static void readComment(Compiler* cmp){
|
||||
else setError(Error_unexpectedCharacter(c));
|
||||
}
|
||||
|
||||
static void readLabel(Compiler* cmp){
|
||||
static void readLabel(Lexer* cmp){
|
||||
char c; // '.'
|
||||
cmp->pos++;
|
||||
cmp->column++;
|
||||
@ -179,7 +179,7 @@ static void readLabel(Compiler* cmp){
|
||||
else setError(Error_endOfFile);
|
||||
}
|
||||
|
||||
static void readArguments(Compiler* cmp){
|
||||
static void readArguments(Lexer* cmp){
|
||||
char c; // space
|
||||
cmp->pos++;
|
||||
cmp->column++;
|
||||
@ -215,7 +215,7 @@ static void readArguments(Compiler* cmp){
|
||||
List_Token_push(&cmp->tokens, tok);
|
||||
}
|
||||
|
||||
static void readInstruction(Compiler* cmp){
|
||||
static void readInstruction(Lexer* cmp){
|
||||
Token tok = Token_construct(TokenType_Instruction, cmp->pos, 0);
|
||||
cmp->pos++;
|
||||
cmp->column++;
|
||||
@ -252,7 +252,7 @@ static void readInstruction(Compiler* cmp){
|
||||
List_Token_push(&cmp->tokens, tok);
|
||||
}
|
||||
|
||||
static void readChar(Compiler* cmp){
|
||||
static void readChar(Lexer* cmp){
|
||||
Token tok = Token_construct(TokenType_Char, cmp->pos, 0);
|
||||
cmp->pos++;
|
||||
cmp->column++;
|
||||
@ -279,7 +279,7 @@ static void readChar(Compiler* cmp){
|
||||
setError(Error_endOfFile);
|
||||
}
|
||||
|
||||
static void readString(Compiler* cmp){
|
||||
static void readString(Lexer* cmp){
|
||||
Token tok = Token_construct(TokenType_String, cmp->pos, 0);
|
||||
cmp->pos++;
|
||||
cmp->column++;
|
||||
@ -306,7 +306,7 @@ static void readString(Compiler* cmp){
|
||||
setError(Error_endOfFile);
|
||||
}
|
||||
|
||||
static void readNumber(Compiler* cmp){
|
||||
static void readNumber(Lexer* cmp){
|
||||
Token tok = Token_construct(TokenType_Number, cmp->pos, 0);
|
||||
cmp->pos++;
|
||||
cmp->column++;
|
||||
@ -329,9 +329,9 @@ static void readNumber(Compiler* cmp){
|
||||
List_Token_push(&cmp->tokens, tok);
|
||||
}
|
||||
|
||||
static bool lex(Compiler* cmp){
|
||||
returnErrorIf_auto(cmp->state != CompilerState_Initial);
|
||||
cmp->state = CompilerState_Lexing;
|
||||
static bool lex(Lexer* cmp){
|
||||
returnErrorIf_auto(cmp->state != LexerState_Initial);
|
||||
cmp->state = LexerState_Lexing;
|
||||
cmp->column = 1;
|
||||
|
||||
while(cmp->pos < cmp->code_len){
|
||||
@ -364,7 +364,7 @@ static bool lex(Compiler* cmp){
|
||||
break;
|
||||
}
|
||||
|
||||
if(cmp->state == CompilerState_Error)
|
||||
if(cmp->state == LexerState_Error)
|
||||
return false;
|
||||
|
||||
c = cmp->code[cmp->pos];
|
||||
@ -378,20 +378,20 @@ static bool lex(Compiler* cmp){
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool parse(Compiler* cmp){
|
||||
returnErrorIf_auto(cmp->state != CompilerState_Lexing);
|
||||
cmp->state = CompilerState_Parsing;
|
||||
static bool parse(Lexer* cmp){
|
||||
returnErrorIf_auto(cmp->state != LexerState_Lexing);
|
||||
cmp->state = LexerState_Parsing;
|
||||
|
||||
return true;
|
||||
}
|
||||
static bool compile(Compiler* cmp, FILE* f){
|
||||
returnErrorIf_auto(cmp->state != CompilerState_Parsing);
|
||||
cmp->state = CompilerState_Compiling;
|
||||
static bool compile(Lexer* cmp, FILE* f){
|
||||
returnErrorIf_auto(cmp->state != LexerState_Parsing);
|
||||
cmp->state = LexerState_Compiling;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Compiler_compileTasm(Compiler* cmp, cstr source_file_name, cstr out_file_name, bool debug){
|
||||
bool Lexer_compileTasm(Lexer* cmp, cstr source_file_name, cstr out_file_name, bool debug){
|
||||
FILE* f = fopen(source_file_name, "rb");
|
||||
if(f == NULL)
|
||||
returnError("ERROR: can't open file '%s'", source_file_name);
|
||||
@ -439,7 +439,7 @@ bool Compiler_compileTasm(Compiler* cmp, cstr source_file_name, cstr out_file_na
|
||||
printf("------------------------------------[tokens]-----------------------------------\n");
|
||||
for(u32 i = 0; i < cmp->tokens.len; i++){
|
||||
Token t = cmp->tokens.data[i];
|
||||
CodePos pos = Compiler_getLineAndColumn(cmp, t.begin);
|
||||
CodePos pos = Lexer_getLineAndColumn(cmp, t.begin);
|
||||
char* tokstr = malloc(4096);
|
||||
strncpy(tokstr, cmp->code + t.begin, t.length);
|
||||
tokstr[t.length] = 0;
|
||||
@ -463,7 +463,7 @@ bool Compiler_compileTasm(Compiler* cmp, cstr source_file_name, cstr out_file_na
|
||||
success = compile(cmp, f);
|
||||
fclose(f);
|
||||
if(success){
|
||||
cmp->state = CompilerState_Success;
|
||||
cmp->state = LexerState_Success;
|
||||
}
|
||||
|
||||
return success;
|
||||
|
||||
@ -2,38 +2,39 @@
|
||||
#include "../std.h"
|
||||
#include "../collections/List.h"
|
||||
#include "token.h"
|
||||
#include "AST.h"
|
||||
|
||||
List_declare(Token);
|
||||
|
||||
typedef enum CompilerState {
|
||||
CompilerState_Initial,
|
||||
CompilerState_Lexing,
|
||||
CompilerState_Parsing,
|
||||
CompilerState_Compiling,
|
||||
CompilerState_Error,
|
||||
CompilerState_Success
|
||||
} CompilerState;
|
||||
typedef enum LexerState {
|
||||
LexerState_Initial,
|
||||
LexerState_Lexing,
|
||||
LexerState_Parsing,
|
||||
LexerState_Compiling,
|
||||
LexerState_Error,
|
||||
LexerState_Success
|
||||
} LexerState;
|
||||
|
||||
typedef struct Compiler {
|
||||
typedef struct Lexer {
|
||||
char* code;
|
||||
u32 code_len;
|
||||
u32 column; // > 0 if code parsing started
|
||||
u32 pos;
|
||||
CompilerState state;
|
||||
LexerState state;
|
||||
NULLABLE(char* error_message);
|
||||
List_Token tokens;
|
||||
List_u32 line_lengths;
|
||||
} Compiler;
|
||||
} Lexer;
|
||||
|
||||
void Compiler_init(Compiler* cmp);
|
||||
void Compiler_free(Compiler* cmp);
|
||||
void Lexer_init(Lexer* cmp);
|
||||
void Lexer_free(Lexer* cmp);
|
||||
|
||||
/// @brief compile assembly language code to machine code
|
||||
/// @return true if no errors, false if any error occured (check cmp->error_message)
|
||||
bool Compiler_compileTasm(Compiler* cmp, cstr source_file_name, cstr out_file_name, bool debug);
|
||||
bool Lexer_compileTasm(Lexer* cmp, cstr source_file_name, cstr out_file_name, bool debug);
|
||||
|
||||
#define Compiler_setError(cmp, format, ...) _Compiler_setError(cmp, __func__, format ,##__VA_ARGS__)
|
||||
void _Compiler_setError(Compiler* cmp, cstr context, cstr format, ...) __attribute__((__format__(__printf__, 3, 4)));
|
||||
#define Lexer_setError(cmp, format, ...) _Lexer_setError(cmp, __func__, format ,##__VA_ARGS__)
|
||||
void _Lexer_setError(Lexer* cmp, cstr context, cstr format, ...) __attribute__((__format__(__printf__, 3, 4)));
|
||||
|
||||
typedef struct CodePos {
|
||||
u32 line; // 0 on error
|
||||
@ -43,5 +44,5 @@ typedef struct CodePos {
|
||||
#define CodePos_create(L, C) ((CodePos){ .line = L, .column = C })
|
||||
|
||||
/// @param pos index in code buffer
|
||||
CodePos Compiler_getLineAndColumn(Compiler* cmp, u32 pos);
|
||||
CodePos Lexer_getLineAndColumn(Lexer* cmp, u32 pos);
|
||||
|
||||
|
||||
0
src/compiler/compiler_internal.h
Normal file
0
src/compiler/compiler_internal.h
Normal file
3
src/compiler/parser.c
Normal file
3
src/compiler/parser.c
Normal file
@ -0,0 +1,3 @@
|
||||
#include "compiler.h"
|
||||
|
||||
List_define(DataDefinition);
|
||||
@ -5,15 +5,19 @@ static cstr _TokenType_str[] = {
|
||||
"SingleLineComment",
|
||||
"MultiLineComment",
|
||||
"Label",
|
||||
"Instruction",
|
||||
"Argument",
|
||||
"DataDefinition",
|
||||
"Number",
|
||||
"Char",
|
||||
"String",
|
||||
"Instruction",
|
||||
"Register",
|
||||
"DataType",
|
||||
"DataPointer",
|
||||
"DataSize"
|
||||
};
|
||||
|
||||
cstr TokenType_toString(TokenType t){
|
||||
if(t >= sizeof(_TokenType_str) / sizeof(cstr))
|
||||
if(t >= ARRAY_SIZE(_TokenType_str))
|
||||
return "!!INDEX_ERROR!!";
|
||||
return _TokenType_str[t];
|
||||
}
|
||||
|
||||
@ -6,11 +6,15 @@ typedef enum TokenType {
|
||||
TokenType_SingleLineComment,
|
||||
TokenType_MultiLineComment,
|
||||
TokenType_Label,
|
||||
TokenType_Instruction,
|
||||
TokenType_Argument,
|
||||
TOkenType_DataDefinition,
|
||||
TokenType_Number,
|
||||
TokenType_Char,
|
||||
TokenType_String,
|
||||
TokenType_Instruction,
|
||||
TokenType_Register,
|
||||
TokenType_DataType,
|
||||
TokenType_DataPointer,
|
||||
TokenType_DataSize
|
||||
} TokenType;
|
||||
|
||||
cstr TokenType_toString(TokenType t);
|
||||
|
||||
@ -1,5 +1,19 @@
|
||||
#include "instructions.h"
|
||||
|
||||
i32 NOP_impl(VM* vm);
|
||||
i32 PUSH_impl(VM* vm);
|
||||
i32 MOV_impl(VM* vm);
|
||||
i32 ADD_impl(VM* vm);
|
||||
i32 SUB_impl(VM* vm);
|
||||
i32 MUL_impl(VM* vm);
|
||||
i32 DIV_impl(VM* vm);
|
||||
i32 MOD_impl(VM* vm);
|
||||
i32 SYS_impl(VM* vm);
|
||||
i32 EXIT_impl(VM* vm);
|
||||
i32 JMP_impl(VM* vm);
|
||||
i32 CALL_impl(VM* vm);
|
||||
|
||||
|
||||
const Instruction instructions[] = {
|
||||
Instruction_construct(NOP),
|
||||
Instruction_construct(PUSH),
|
||||
@ -14,10 +28,9 @@ const Instruction instructions[] = {
|
||||
// Instruction_construct(JMP),
|
||||
// Instruction_construct(CALL),
|
||||
};
|
||||
const size_t instructions_count = sizeof(instructions)/sizeof(instructions[0]);
|
||||
|
||||
const Instruction* NULLABLE(Instruction_getFromOpcode)(u8 opcode){
|
||||
if(opcode >= instructions_count)
|
||||
const Instruction* Instruction_getFromOpcode(Opcode opcode){
|
||||
if(opcode >= ARRAY_SIZE(instructions))
|
||||
return NULL;
|
||||
|
||||
return instructions + opcode;
|
||||
|
||||
@ -5,30 +5,32 @@
|
||||
///@returns number of bytes read
|
||||
typedef i32 (*InstructionImplFunc_t)(VM* vm);
|
||||
|
||||
typedef enum __attribute__((__packed__)) Opcode {
|
||||
Opcode_NOP,
|
||||
Opcode_PUSH,
|
||||
Opcode_MOV,
|
||||
Opcode_ADD,
|
||||
Opcode_SUB,
|
||||
Opcode_MUL,
|
||||
Opcode_DIV,
|
||||
Opcode_MOD,
|
||||
Opcode_SYS,
|
||||
Opcode_EXIT,
|
||||
} Opcode;
|
||||
|
||||
typedef struct Instruction {
|
||||
cstr name;
|
||||
InstructionImplFunc_t implementation;
|
||||
Opcode opcode;
|
||||
} Instruction;
|
||||
|
||||
#define Instruction_construct(NAME) {\
|
||||
.name = #NAME, \
|
||||
.implementation = NAME##_impl \
|
||||
.implementation = NAME##_impl, \
|
||||
.opcode = Opcode_##NAME\
|
||||
}
|
||||
|
||||
/// @brief get instruction info from table
|
||||
/// @param opcode any byte
|
||||
/// @return ptr to struct or NULL
|
||||
const Instruction* NULLABLE(Instruction_getFromOpcode)(u8 opcode);
|
||||
|
||||
i32 NOP_impl(VM* vm);
|
||||
i32 PUSH_impl(VM* vm);
|
||||
i32 MOV_impl(VM* vm);
|
||||
i32 ADD_impl(VM* vm);
|
||||
i32 SUB_impl(VM* vm);
|
||||
i32 MUL_impl(VM* vm);
|
||||
i32 DIV_impl(VM* vm);
|
||||
i32 MOD_impl(VM* vm);
|
||||
i32 SYS_impl(VM* vm);
|
||||
i32 EXIT_impl(VM* vm);
|
||||
i32 JMP_impl(VM* vm);
|
||||
i32 CALL_impl(VM* vm);
|
||||
const Instruction* NULLABLE(Instruction_getFromOpcode)(Opcode opcode);
|
||||
|
||||
Loading…
Reference in New Issue
Block a user