lexer
This commit is contained in:
parent
1cae800a66
commit
1729070b80
2
.vscode/launch.json
vendored
2
.vscode/launch.json
vendored
@ -7,7 +7,7 @@
|
||||
"request": "launch",
|
||||
"program": "${workspaceFolder}/bin/tcpu",
|
||||
"windows": { "program": "${workspaceFolder}/bin/tcpu.exe" },
|
||||
"args": [ "--image", "image.bin" ],
|
||||
"args": [ "-c", "s.tasm", "o.bin" ],
|
||||
"cwd": "${workspaceFolder}/bin",
|
||||
"preLaunchTask": "build_exec_dbg",
|
||||
"stopAtEntry": false,
|
||||
|
||||
@ -6,7 +6,7 @@ void VM_init(VM* vm){
|
||||
vm->state = VMState_Initialized;
|
||||
}
|
||||
|
||||
void _VM_setError(VM* vm, const char* context, const char* format, ...){
|
||||
void _VM_setError(VM* vm, cstr context, cstr format, ...){
|
||||
va_list argv;
|
||||
char position_str[32];
|
||||
sprintf(position_str, "[at 0x%x][", (u32)vm->current_pos);
|
||||
@ -23,7 +23,7 @@ void _VM_setError(VM* vm, const char* context, const char* format, ...){
|
||||
vm->state = VMState_InternalError;
|
||||
}
|
||||
|
||||
bool VM_loadProgram(VM* vm, u8* data, size_t size){
|
||||
bool VM_setMemory(VM* vm, u8* data, size_t size){
|
||||
if(data == NULL){
|
||||
VM_setError(vm, "data == NULL");
|
||||
return false;
|
||||
@ -38,7 +38,7 @@ bool VM_loadProgram(VM* vm, u8* data, size_t size){
|
||||
return true;
|
||||
}
|
||||
|
||||
i32 VM_executeProgram(VM* vm){
|
||||
i32 VM_boot(VM* vm){
|
||||
if(vm->data == NULL){
|
||||
VM_setError(vm, "data == null");
|
||||
return -1;
|
||||
|
||||
@ -56,15 +56,15 @@ typedef struct VM {
|
||||
void VM_init(VM* vm);
|
||||
|
||||
/// @brief Loads a program from the buffer.
|
||||
/// @param data buffer with full program code
|
||||
/// @param data buffer starting with machine code
|
||||
/// @param size size of the program in bytes
|
||||
bool VM_loadProgram(VM* vm, u8* data, size_t size);
|
||||
bool VM_setMemory(VM* vm, u8* data, size_t size);
|
||||
|
||||
/// @brief Executes the program loaded into VM.
|
||||
/// @return program exit code or -1 on error (check vm.error_message)
|
||||
i32 VM_executeProgram(VM* vm);
|
||||
i32 VM_boot(VM* vm);
|
||||
|
||||
bool VM_dataRead(VM* vm, void* dst, size_t pos, size_t size);
|
||||
|
||||
#define VM_setError(vm, format, ...) _VM_setError(vm, __func__, format ,##__VA_ARGS__)
|
||||
void _VM_setError(VM* vm, const char* context, const char* format, ...) __attribute__((__format__(__printf__, 3, 4)));
|
||||
void _VM_setError(VM* vm, cstr context, cstr format, ...) __attribute__((__format__(__printf__, 3, 4)));
|
||||
|
||||
5
src/collections/List.c
Normal file
5
src/collections/List.c
Normal file
@ -0,0 +1,5 @@
|
||||
#include "List.h"
|
||||
|
||||
List_define(cstr);
|
||||
List_define(u32);
|
||||
List_define(u8);
|
||||
@ -39,3 +39,7 @@
|
||||
// sizeof(T) == 2 - padding is 3 of sizeof(T)
|
||||
// sizeof(T) == 4 - padding is 1 of sizeof(T)
|
||||
#define __List_padding_in_sizeof_T(T) ((8 - sizeof(T) % 8) / sizeof(T) )
|
||||
|
||||
List_declare(cstr);
|
||||
List_declare(u32);
|
||||
List_declare(u8);
|
||||
|
||||
@ -2,25 +2,50 @@
|
||||
|
||||
List_define(Token);
|
||||
|
||||
void _Compiler_setError(Compiler* cmp, const char* context, const char* format, ...){
|
||||
va_list argv;
|
||||
CodePos Compiler_getLineAndColumn(Compiler* cmp, u32 pos){
|
||||
u32 prev_lines_len = 0;
|
||||
if(pos >= cmp->code_len)
|
||||
return CodePos_create(0, 0);
|
||||
|
||||
for(u32 i = 0; i < cmp->line_lengths.len; i++){
|
||||
u32 line_len = cmp->line_lengths.data[i];
|
||||
if(prev_lines_len + line_len > pos)
|
||||
return CodePos_create(i + 1, pos + 1 - prev_lines_len);
|
||||
prev_lines_len += line_len;
|
||||
}
|
||||
|
||||
return CodePos_create(0, 0);
|
||||
}
|
||||
|
||||
static void completeLine(Compiler* cmp){
|
||||
List_u32_push(&cmp->line_lengths, cmp->column);
|
||||
cmp->column = 0;
|
||||
}
|
||||
|
||||
void _Compiler_setError(Compiler* cmp, cstr context, cstr format, ...){
|
||||
completeLine(cmp);
|
||||
// happens at the end of file
|
||||
if(cmp->pos >= cmp->code_len)
|
||||
cmp->pos = cmp->code_len - 1;
|
||||
char position_str[32];
|
||||
sprintf(position_str, "[at %u:%u][", cmp->line, cmp->column);
|
||||
CodePos code_pos = Compiler_getLineAndColumn(cmp, cmp->pos);
|
||||
sprintf(position_str, "[at %u:%u][", code_pos.line, code_pos.column);
|
||||
char* real_format = strcat_malloc(position_str, context, "] ", format);
|
||||
va_list argv;
|
||||
va_start(argv, format);
|
||||
char* NULLABLE(buf) = vsprintf_malloc(512, real_format, argv);
|
||||
va_end(argv);
|
||||
free(real_format);
|
||||
if(cmp->error_message == NULL){
|
||||
cmp->error_message = malloc(16);
|
||||
strcpy(cmp->error_message, "SPRINTF FAILED");
|
||||
if(buf == NULL){
|
||||
buf = malloc(16);
|
||||
strcpy(buf, "SPRINTF FAILED");
|
||||
}
|
||||
cmp->state = CompilerState_Error;
|
||||
cmp->error_message = buf;
|
||||
}
|
||||
|
||||
|
||||
#define setError(FORMAT, ...) Compiler_setError(cmp, FORMAT, ##__VA_ARGS__);
|
||||
#define setError(FORMAT, ...) Compiler_setError(cmp, FORMAT, ##__VA_ARGS__)
|
||||
|
||||
#define returnError(FORMAT, ...) {\
|
||||
setError(FORMAT, ##__VA_ARGS__);\
|
||||
@ -31,139 +56,331 @@ void _Compiler_setError(Compiler* cmp, const char* context, const char* format,
|
||||
|
||||
#define returnErrorIf_auto(STATEMENT) returnErrorIf(STATEMENT, #STATEMENT)
|
||||
|
||||
#define Error_UnexpectedCharacter(C) "unexpected character '%c'", C
|
||||
|
||||
#define Error_endOfFile() "unexpected end of file"
|
||||
#define Error_unexpectedCharacter(C) "unexpected character '%c'", C
|
||||
#define Error_endOfFile "unexpected end of file"
|
||||
|
||||
void Compiler_init(Compiler* cmp){
|
||||
memset(cmp, 0, sizeof(Compiler));
|
||||
cmp->state = CompilerState_Initial;
|
||||
cmp->tokens = List_Token_alloc(1024 * 8);
|
||||
cmp->tokens = List_Token_alloc(4096);
|
||||
cmp->line_lengths = List_u32_alloc(1024);
|
||||
}
|
||||
|
||||
void Compiler_free(Compiler* cmp){
|
||||
if(cmp->tokens.data)
|
||||
free(cmp->tokens.data);
|
||||
free(cmp->code);
|
||||
free(cmp->tokens.data);
|
||||
free(cmp->line_lengths.data);
|
||||
}
|
||||
|
||||
static void _recognizeLexema(Compiler* cmp, size_t pos, char c){
|
||||
switch(c){
|
||||
// skip blank characters
|
||||
case ' ': case '\t': case '\r': case '\n':
|
||||
break;
|
||||
// try read comment
|
||||
case '/':
|
||||
cmp->state = CompilerState_ReadingComment;
|
||||
break;
|
||||
// try read label
|
||||
case '.':
|
||||
cmp->state = CompilerState_ReadingLabel;
|
||||
break;
|
||||
default:
|
||||
// try read instruction
|
||||
if(isAlphabeticalL(c) || isAlphabeticalR(c)){
|
||||
cmp->state = CompilerState_ReadingInstruction;
|
||||
break;
|
||||
}
|
||||
else setError(Error_UnexpectedCharacter(c));
|
||||
}
|
||||
}
|
||||
|
||||
static void _readCommentChar(Compiler* cmp, size_t pos, char c, Token* tok){
|
||||
// begin comment
|
||||
if(tok->type == TokenType_Unset){
|
||||
if(c == '*')
|
||||
tok->type = TokenType_MultiLineComment;
|
||||
else if(c == '/')
|
||||
tok->type = TokenType_SingleLineComment;
|
||||
else {
|
||||
setError(Error_UnexpectedCharacter(c));
|
||||
static void readCommentSingleLine(Compiler* cmp){
|
||||
char c; // '/'
|
||||
Token tok = Token_construct(TokenType_SingleLineComment, cmp->pos - 1, 0);
|
||||
cmp->column++;
|
||||
cmp->pos++;
|
||||
|
||||
while(cmp->pos < cmp->code_len){
|
||||
c = cmp->code[cmp->pos];
|
||||
// end of line
|
||||
if(c == '\r' || c == '\n'){
|
||||
tok.length = cmp->pos - tok.begin;
|
||||
List_Token_push(&cmp->tokens, tok);
|
||||
// cmp->line will be increased in lex()
|
||||
return;
|
||||
}
|
||||
tok->begin = pos;
|
||||
tok->length = 0;
|
||||
|
||||
cmp->column++;
|
||||
cmp->pos++;
|
||||
}
|
||||
// end multi-line comment
|
||||
else if(c == '/' && tok->type == TokenType_SingleLineComment) {
|
||||
if(cmp->code[pos - 1] == '*' && pos - 1 > tok->begin){
|
||||
tok->length = pos - tok->begin - 2;
|
||||
}
|
||||
}
|
||||
// end single-line comment
|
||||
else if(c == '\n' && tok->type == TokenType_SingleLineComment) {
|
||||
tok->length = pos - tok->begin - 1;
|
||||
// remove trailing '\r'
|
||||
if(cmp->code[pos - 1] == '\r')
|
||||
tok->length--;
|
||||
List_Token_push(&cmp->tokens, *tok);
|
||||
}
|
||||
// at the end of file
|
||||
else if(pos == cmp->code_len - 1){
|
||||
// end single-line comment
|
||||
if(tok->type == TokenType_SingleLineComment){
|
||||
tok->length = pos - tok->begin;
|
||||
}
|
||||
// error on unclosed multiline comment
|
||||
else setError(Error_endOfFile());
|
||||
}
|
||||
// do nothing on comment content
|
||||
|
||||
// end of file
|
||||
tok.length = cmp->pos - tok.begin;
|
||||
List_Token_push(&cmp->tokens, tok);
|
||||
}
|
||||
|
||||
static void readCommentMultiLine(Compiler* cmp){
|
||||
char c; // '*'
|
||||
Token tok = Token_construct(TokenType_MultiLineComment, cmp->pos - 1, 0);
|
||||
cmp->column++;
|
||||
cmp->pos++;
|
||||
|
||||
while(cmp->pos < cmp->code_len){
|
||||
c = cmp->code[cmp->pos];
|
||||
// closing comment
|
||||
if(cmp->pos > tok.begin + 3 && c == '/' && cmp->code[cmp->pos - 1] == '*') {
|
||||
tok.length = cmp->pos - tok.begin + 1;
|
||||
List_Token_push(&cmp->tokens, tok);
|
||||
return;
|
||||
}
|
||||
|
||||
if(c == '\n')
|
||||
completeLine(cmp);
|
||||
cmp->column++;
|
||||
cmp->pos++;
|
||||
}
|
||||
|
||||
// end of file
|
||||
setError(Error_endOfFile);
|
||||
}
|
||||
|
||||
bool _Compiler_lex(Compiler* cmp){
|
||||
cmp->line = 1;
|
||||
static void readComment(Compiler* cmp){
|
||||
char c; // '/'
|
||||
if(cmp->pos + 1 == cmp->code_len){
|
||||
setError(Error_endOfFile);
|
||||
return;
|
||||
}
|
||||
|
||||
c = cmp->code[cmp->pos + 1];
|
||||
if(c == '\r' || c == '\n'){
|
||||
setError(Error_unexpectedCharacter(cmp->code[--cmp->pos]));
|
||||
return;
|
||||
}
|
||||
|
||||
cmp->pos++;
|
||||
cmp->column++;
|
||||
if(c == '/')
|
||||
readCommentSingleLine(cmp);
|
||||
else if(c == '*')
|
||||
readCommentMultiLine(cmp);
|
||||
else setError(Error_unexpectedCharacter(c));
|
||||
}
|
||||
|
||||
static void readLabel(Compiler* cmp){
|
||||
char c; // '.'
|
||||
cmp->pos++;
|
||||
cmp->column++;
|
||||
Token tok = Token_construct(TokenType_Label, cmp->pos, 0);
|
||||
|
||||
while(cmp->pos < cmp->code_len){
|
||||
c = cmp->code[cmp->pos];
|
||||
// end of line
|
||||
if(c == ':' || c == '\r' || c == '\n'){
|
||||
tok.length = cmp->pos - tok.begin;
|
||||
if(tok.length > 0)
|
||||
List_Token_push(&cmp->tokens, tok);
|
||||
else setError(Error_unexpectedCharacter(cmp->code[--cmp->pos]));
|
||||
// cmp->line will be increased in lex()
|
||||
return;
|
||||
}
|
||||
|
||||
if(!isAlphabeticalLower(c) && !isAlphabeticalUpper(c) && !isDigit(c)){
|
||||
setError(Error_unexpectedCharacter(c));
|
||||
return;
|
||||
}
|
||||
|
||||
cmp->column++;
|
||||
cmp->pos++;
|
||||
}
|
||||
|
||||
// end of file
|
||||
tok.length = cmp->pos - tok.begin;
|
||||
if(tok.length > 0)
|
||||
List_Token_push(&cmp->tokens, tok);
|
||||
else setError(Error_endOfFile);
|
||||
}
|
||||
|
||||
static void readArguments(Compiler* cmp){
|
||||
char c; // space
|
||||
cmp->pos++;
|
||||
cmp->column++;
|
||||
Token tok = Token_construct(TokenType_Argument, cmp->pos, 0);
|
||||
|
||||
while(cmp->pos < cmp->code_len){
|
||||
c = cmp->code[cmp->pos];
|
||||
// end of line
|
||||
if(c == '\r' || c == '\n'){
|
||||
tok.length = cmp->pos - tok.begin;
|
||||
if(tok.length > 0)
|
||||
List_Token_push(&cmp->tokens, tok);
|
||||
// cmp->line will be increased in lex()
|
||||
return;
|
||||
}
|
||||
|
||||
// new argument begins
|
||||
if(c == ' ' || c == '\t'){
|
||||
tok.length = cmp->pos - tok.begin;
|
||||
if(tok.length > 0)
|
||||
List_Token_push(&cmp->tokens, tok);
|
||||
tok.begin = cmp->pos + 1;
|
||||
}
|
||||
|
||||
|
||||
cmp->column++;
|
||||
cmp->pos++;
|
||||
}
|
||||
|
||||
// end of file
|
||||
tok.length = cmp->pos - tok.begin;
|
||||
if(tok.length > 0)
|
||||
List_Token_push(&cmp->tokens, tok);
|
||||
}
|
||||
|
||||
static void readInstruction(Compiler* cmp){
|
||||
Token tok = Token_construct(TokenType_Instruction, cmp->pos, 0);
|
||||
cmp->pos++;
|
||||
cmp->column++;
|
||||
|
||||
while(cmp->pos < cmp->code_len){
|
||||
char c = cmp->code[cmp->pos];
|
||||
// end of line
|
||||
if(c == '\r' || c == '\n'){
|
||||
tok.length = cmp->pos - tok.begin;
|
||||
List_Token_push(&cmp->tokens, tok);
|
||||
// cmp->line will be increased in lex()
|
||||
return;
|
||||
}
|
||||
|
||||
// arguments begin
|
||||
if(c == ' ' || c == '\t'){
|
||||
tok.length = cmp->pos - tok.begin;
|
||||
List_Token_push(&cmp->tokens, tok);
|
||||
readArguments(cmp);
|
||||
return;
|
||||
}
|
||||
|
||||
if(!isAlphabeticalLower(c) && !isAlphabeticalUpper(c) && !isDigit(c)){
|
||||
setError(Error_unexpectedCharacter(c));
|
||||
return;
|
||||
}
|
||||
|
||||
cmp->column++;
|
||||
cmp->pos++;
|
||||
}
|
||||
|
||||
// end of file
|
||||
tok.length = cmp->pos - tok.begin;
|
||||
List_Token_push(&cmp->tokens, tok);
|
||||
}
|
||||
|
||||
static bool lex(Compiler* cmp){
|
||||
returnErrorIf_auto(cmp->state != CompilerState_Initial);
|
||||
cmp->state = CompilerState_Lexing;
|
||||
cmp->column = 1;
|
||||
Token tok = Token_construct(TokenType_Unset, 0, 0);
|
||||
for(size_t pos = 0; pos < cmp->code_len; pos++){
|
||||
char c = cmp->code[pos];
|
||||
|
||||
switch(cmp->state){
|
||||
case CompilerState_Error:
|
||||
return false;
|
||||
case CompilerState_LexemaRecognition:
|
||||
_recognizeLexema(cmp, pos, c);
|
||||
while(cmp->pos < cmp->code_len){
|
||||
char c = cmp->code[cmp->pos];
|
||||
switch(c){
|
||||
// skip blank characters
|
||||
case ' ': case '\t': case '\r': case '\n':
|
||||
break;
|
||||
case CompilerState_ReadingComment:
|
||||
_readCommentChar(cmp, pos, c, &tok);
|
||||
// try read comment
|
||||
case '/':
|
||||
readComment(cmp);
|
||||
break;
|
||||
// try read label
|
||||
case '.':
|
||||
readLabel(cmp);
|
||||
break;
|
||||
default:
|
||||
returnError("Unexpected compiler state %i", cmp->state);
|
||||
// try read instruction
|
||||
if(isAlphabeticalLower(c) || isAlphabeticalUpper(c)){
|
||||
readInstruction(cmp);
|
||||
break;
|
||||
}
|
||||
else returnError(Error_unexpectedCharacter(c));
|
||||
}
|
||||
|
||||
if(c == '\n'){
|
||||
cmp->column = 0;
|
||||
cmp->line++;
|
||||
}
|
||||
if(cmp->state == CompilerState_Error)
|
||||
return false;
|
||||
|
||||
c = cmp->code[cmp->pos];
|
||||
if(c == '\n')
|
||||
completeLine(cmp);
|
||||
cmp->column++;
|
||||
cmp->pos++;
|
||||
}
|
||||
|
||||
return cmp->state != CompilerState_Error;
|
||||
}
|
||||
|
||||
bool _Compiler_parse(Compiler* cmp){
|
||||
return true;
|
||||
}
|
||||
bool _Compiler_compile(Compiler* cmp){
|
||||
completeLine(cmp);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Compiler_compileTasm(Compiler* cmp, const char* restrict code, size_t code_len, char* restrict out_buffer, size_t out_len){
|
||||
returnErrorIf_auto(code == NULL);
|
||||
returnErrorIf_auto(code_len == 0);
|
||||
returnErrorIf_auto(out_buffer == NULL);
|
||||
returnErrorIf_auto(out_len == 0);
|
||||
cmp->code = code;
|
||||
cmp->code_len = code_len;
|
||||
cmp->out_buffer = out_buffer;
|
||||
cmp->out_len = out_len;
|
||||
static bool parse(Compiler* cmp){
|
||||
returnErrorIf_auto(cmp->state != CompilerState_Lexing);
|
||||
cmp->state = CompilerState_Parsing;
|
||||
|
||||
if(!_Compiler_lex(cmp))
|
||||
return false;
|
||||
if(!_Compiler_parse(cmp))
|
||||
return false;
|
||||
if(!_Compiler_compile(cmp))
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
static bool compile(Compiler* cmp, FILE* f){
|
||||
returnErrorIf_auto(cmp->state != CompilerState_Parsing);
|
||||
cmp->state = CompilerState_Compiling;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Compiler_compileTasm(Compiler* cmp, cstr source_file_name, cstr out_file_name, bool debug){
|
||||
FILE* f = fopen(source_file_name, "rb");
|
||||
if(f == NULL)
|
||||
returnError("ERROR: can't open file '%s'", source_file_name);
|
||||
|
||||
return true;
|
||||
List_u8 buf = List_u8_alloc(64 * 1024);
|
||||
int ret;
|
||||
while((ret = fgetc(f)) != EOF) {
|
||||
List_u8_push(&buf, ret);
|
||||
}
|
||||
if(ferror(f)){
|
||||
free(buf.data);
|
||||
fclose(f);
|
||||
returnError("can't read file '%s'", source_file_name);
|
||||
}
|
||||
fclose(f);
|
||||
|
||||
if(buf.len == 0){
|
||||
free(buf.data);
|
||||
fclose(f);
|
||||
returnError("soucre file is empty");
|
||||
}
|
||||
|
||||
cmp->code = (char*)buf.data;
|
||||
cmp->code_len = buf.len;
|
||||
List_u8_push(&buf, 0);
|
||||
|
||||
f = fopen(out_file_name, "wb");
|
||||
if(f == NULL){
|
||||
free(buf.data);
|
||||
returnError("ERROR: can't open file '%s'", out_file_name);
|
||||
}
|
||||
|
||||
if(debug){
|
||||
printf("----------------------------------[%s]---------------------------------\n", source_file_name);
|
||||
fputs(cmp->code, stdout);
|
||||
fputc('\n', stdout);
|
||||
}
|
||||
|
||||
bool success = lex(cmp);
|
||||
if(debug){
|
||||
printf("------------------------------------[lines]-----------------------------------\n");
|
||||
for(u32 i = 0; i < cmp->line_lengths.len; i++){
|
||||
printf("[%u] length: %u\n", i+1, cmp->line_lengths.data[i]);
|
||||
}
|
||||
printf("------------------------------------[tokens]-----------------------------------\n");
|
||||
for(u32 i = 0; i < cmp->tokens.len; i++){
|
||||
Token t = cmp->tokens.data[i];
|
||||
CodePos pos = Compiler_getLineAndColumn(cmp, t.begin);
|
||||
char* tokstr = malloc(4096);
|
||||
strncpy(tokstr, cmp->code + t.begin, t.length);
|
||||
tokstr[t.length] = 0;
|
||||
printf("[l:%u, c:%u](pos:%u, size:%u) %s '%s'\n",
|
||||
pos.line, pos.column,
|
||||
t.begin, t.length,
|
||||
TokenType_toString(t.type), tokstr);
|
||||
free(tokstr);
|
||||
}
|
||||
}
|
||||
if(!success){
|
||||
fclose(f);
|
||||
return false;
|
||||
}
|
||||
|
||||
success = parse(cmp);
|
||||
if(!success){
|
||||
fclose(f);
|
||||
return false;
|
||||
}
|
||||
|
||||
success = compile(cmp, f);
|
||||
fclose(f);
|
||||
if(success){
|
||||
cmp->state = CompilerState_Success;
|
||||
}
|
||||
|
||||
return success;
|
||||
}
|
||||
|
||||
@ -7,27 +7,22 @@ List_declare(Token);
|
||||
|
||||
typedef enum CompilerState {
|
||||
CompilerState_Initial,
|
||||
CompilerState_LexemaRecognition,
|
||||
CompilerState_ReadingComment,
|
||||
CompilerState_ReadingLabel,
|
||||
CompilerState_ReadingInstruction,
|
||||
CompilerState_ReadingArguments,
|
||||
CompilerState_ReadingData,
|
||||
CompilerState_Lexing,
|
||||
CompilerState_Parsing,
|
||||
CompilerState_Compiling,
|
||||
CompilerState_Error,
|
||||
CompilerState_Success
|
||||
} CompilerState;
|
||||
|
||||
typedef struct Compiler {
|
||||
const char* code;
|
||||
size_t code_len;
|
||||
char* out_buffer;
|
||||
size_t out_len;
|
||||
|
||||
u32 line; // > 0 if code parsing started
|
||||
char* code;
|
||||
u32 code_len;
|
||||
u32 column; // > 0 if code parsing started
|
||||
NULLABLE(char* error_message);
|
||||
u32 pos;
|
||||
CompilerState state;
|
||||
NULLABLE(char* error_message);
|
||||
List_Token tokens;
|
||||
List_u32 line_lengths;
|
||||
} Compiler;
|
||||
|
||||
void Compiler_init(Compiler* cmp);
|
||||
@ -35,7 +30,18 @@ void Compiler_free(Compiler* cmp);
|
||||
|
||||
/// @brief compile assembly language code to machine code
|
||||
/// @return true if no errors, false if any error occured (check cmp->error_message)
|
||||
bool Compiler_compileTasm(Compiler* cmp, const char* restrict code, size_t code_len, char* restrict out_buffer, size_t out_len);
|
||||
bool Compiler_compileTasm(Compiler* cmp, cstr source_file_name, cstr out_file_name, bool debug);
|
||||
|
||||
#define Compiler_setError(cmp, format, ...) _Compiler_setError(cmp, __func__, format ,##__VA_ARGS__)
|
||||
void _Compiler_setError(Compiler* cmp, const char* context, const char* format, ...) __attribute__((__format__(__printf__, 3, 4)));
|
||||
void _Compiler_setError(Compiler* cmp, cstr context, cstr format, ...) __attribute__((__format__(__printf__, 3, 4)));
|
||||
|
||||
typedef struct CodePos {
|
||||
u32 line; // 0 on error
|
||||
u32 column; // 0 on error
|
||||
} CodePos;
|
||||
|
||||
#define CodePos_create(L, C) ((CodePos){ .line = L, .column = C })
|
||||
|
||||
/// @param pos index in code buffer
|
||||
CodePos Compiler_getLineAndColumn(Compiler* cmp, u32 pos);
|
||||
|
||||
|
||||
15
src/compiler/token.c
Normal file
15
src/compiler/token.c
Normal file
@ -0,0 +1,15 @@
|
||||
#include "token.h"
|
||||
|
||||
static cstr TokenType_str[] = {
|
||||
"Unset",
|
||||
"SingleLineComment",
|
||||
"MultiLineComment",
|
||||
"Label",
|
||||
"Instruction",
|
||||
"Argument",
|
||||
"Data",
|
||||
};
|
||||
|
||||
cstr TokenType_toString(TokenType t){
|
||||
return TokenType_str[t];
|
||||
}
|
||||
@ -12,6 +12,8 @@ typedef enum TokenType {
|
||||
/* there is a place for 2 values left (TokenType must occupy 4 bits) */
|
||||
} TokenType;
|
||||
|
||||
cstr TokenType_toString(TokenType t);
|
||||
|
||||
typedef struct Token {
|
||||
u32 begin; // some index in Compiler->code
|
||||
u32 length : 28; // length in characters (28 bits)
|
||||
|
||||
@ -6,7 +6,7 @@
|
||||
typedef i32 (*InstructionImplFunc_t)(VM* vm);
|
||||
|
||||
typedef struct Instruction {
|
||||
const char* name;
|
||||
cstr name;
|
||||
InstructionImplFunc_t implementation;
|
||||
} Instruction;
|
||||
|
||||
|
||||
52
src/main.c
52
src/main.c
@ -1,13 +1,11 @@
|
||||
#include "VM/VM.h"
|
||||
#include "instructions/instructions.h"
|
||||
#include "collections/List.h"
|
||||
|
||||
List_declare(cstr);
|
||||
List_define(cstr);
|
||||
#include "compiler/compiler.h"
|
||||
|
||||
#define arg_is(STR) (strcmp(argv[argi], STR) == 0)
|
||||
|
||||
i32 compileSources(cstr out_file, List_cstr* sources);
|
||||
i32 compileSources(cstr source_file, cstr out_file);
|
||||
i32 bootFromImage(cstr image_file);
|
||||
|
||||
i32 main(const i32 argc, cstr* argv){
|
||||
@ -18,9 +16,10 @@ i32 main(const i32 argc, cstr* argv){
|
||||
|
||||
bool boot = false;
|
||||
cstr NULLABLE(image_file) = NULL;
|
||||
|
||||
bool compile = false;
|
||||
cstr NULLABLE(out_file) = NULL;
|
||||
List_cstr NULLABLE(sources) = List_cstr_construct(NULL, 0, 0);
|
||||
cstr NULLABLE(source_file) = NULL;
|
||||
|
||||
for(i32 argi = 1; argi < argc; argi++){
|
||||
if(arg_is("-h") || arg_is("--help")){
|
||||
@ -28,7 +27,7 @@ i32 main(const i32 argc, cstr* argv){
|
||||
"-h, --help Show this message.\n"
|
||||
"-op, --opcodes Show list of all instructions.\n"
|
||||
"-i, --image [FILE] Boot VM using image file.\n"
|
||||
"-c, --compile [OUT_FILE] [SOURCES...] Compile assembly source files to machine code.\n"
|
||||
"-c, --compile [SOURCE_FILE] [OUT_FILE] Compile assembly source files to machine code.\n"
|
||||
);
|
||||
return 0;
|
||||
}
|
||||
@ -46,28 +45,32 @@ i32 main(const i32 argc, cstr* argv){
|
||||
printfe("--image flag is set already\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
boot = true;
|
||||
if(++argi >= argc){
|
||||
printfe("ERROR: no image file specified\n");
|
||||
return 1;
|
||||
}
|
||||
image_file = argv[argi];
|
||||
boot = true;
|
||||
}
|
||||
else if(arg_is("-c") || arg_is("--compile")){
|
||||
if(compile){
|
||||
printfe("--compile flag is set already\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
compile = true;
|
||||
if(++argi >= argc){
|
||||
printfe("ERROR: no source file file specified\n");
|
||||
return 1;
|
||||
}
|
||||
source_file = argv[argi];
|
||||
|
||||
if(++argi >= argc){
|
||||
printfe("ERROR: no output file file specified\n");
|
||||
return 1;
|
||||
}
|
||||
out_file = argv[argi];
|
||||
sources = List_cstr_alloc(argc);
|
||||
compile = true;
|
||||
}
|
||||
else if(compile){
|
||||
List_cstr_push(&sources, argv[argi]);
|
||||
}
|
||||
else {
|
||||
printfe("ERROR: unknown argument '%s'\n", argv[argi]);
|
||||
@ -77,7 +80,7 @@ i32 main(const i32 argc, cstr* argv){
|
||||
|
||||
i32 exit_code = 0;
|
||||
if(compile){
|
||||
exit_code = compileSources(out_file, &sources);
|
||||
exit_code = compileSources(source_file, out_file);
|
||||
}
|
||||
if(exit_code == 0 && boot){
|
||||
exit_code = bootFromImage(image_file);
|
||||
@ -109,15 +112,15 @@ i32 bootFromImage(cstr image_file){
|
||||
VM_init(&vm);
|
||||
|
||||
i32 exit_code = 1;
|
||||
if(VM_loadProgram(&vm, vm_memory, bytes_read)){
|
||||
exit_code = VM_executeProgram(&vm);
|
||||
if(VM_setMemory(&vm, vm_memory, bytes_read)){
|
||||
exit_code = VM_boot(&vm);
|
||||
}
|
||||
if(vm.state == VMState_InternalError){
|
||||
if(vm.error_message){
|
||||
printfe("VM ERROR: %s\n", vm.error_message);
|
||||
free(vm.error_message);
|
||||
}
|
||||
else printfe("VM ERROR: unknown error (error_message is null)\n");
|
||||
else printfe("VM ERROR: unknown (error_message is null)\n");
|
||||
}
|
||||
|
||||
if(exit_code != 0){
|
||||
@ -128,6 +131,19 @@ i32 bootFromImage(cstr image_file){
|
||||
return exit_code;
|
||||
}
|
||||
|
||||
i32 compileSources(cstr out_file, List_cstr* sources){
|
||||
return -1;
|
||||
i32 compileSources(cstr source_file, cstr out_file){
|
||||
Compiler cmp;
|
||||
Compiler_init(&cmp);
|
||||
bool success = Compiler_compileTasm(&cmp, source_file, out_file, true);
|
||||
Compiler_free(&cmp);
|
||||
if(!success){
|
||||
if(cmp.error_message){
|
||||
printfe("COMPILER ERROR: %s\n", cmp.error_message);
|
||||
free(cmp.error_message);
|
||||
}
|
||||
else printfe("COMPILER ERROR: unknown (error_message is null)\n");
|
||||
return 111;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
14
src/std.h
14
src/std.h
@ -46,12 +46,12 @@ typedef const char* cstr;
|
||||
#define NULLABLE(NAME) NAME
|
||||
|
||||
#define strcat_malloc(STR0, ...) _strcat_malloc(count_args(__VA_ARGS__), STR0, __VA_ARGS__)
|
||||
char* _strcat_malloc(size_t n, const char* str0, ...);
|
||||
char* _vstrcat_malloc(size_t n, const char* str0, va_list argv);
|
||||
char* _strcat_malloc(size_t n, cstr str0, ...);
|
||||
char* _vstrcat_malloc(size_t n, cstr str0, va_list argv);
|
||||
|
||||
char* NULLABLE(sprintf_malloc)(size_t buffer_size, const char* format, ...) __attribute__((__format__(__printf__, 2, 3)));
|
||||
char* NULLABLE(vsprintf_malloc)(size_t buffer_size, const char* format, va_list argv);
|
||||
char* NULLABLE(sprintf_malloc)(size_t buffer_size, cstr format, ...) __attribute__((__format__(__printf__, 2, 3)));
|
||||
char* NULLABLE(vsprintf_malloc)(size_t buffer_size, cstr format, va_list argv);
|
||||
|
||||
static inline bool isAlphabeticalL(char c) { return c - 'a' <= 'z'; }
|
||||
static inline bool isAlphabeticalR(char c) { return c - 'A' <= 'Z'; }
|
||||
static inline bool isDigit(char c) { return c - '0' <= '9'; }
|
||||
static inline bool isAlphabeticalLower(char c) { return 'a' <= c && c <= 'z'; }
|
||||
static inline bool isAlphabeticalUpper(char c) { return 'A' <= c && c <= 'Z'; }
|
||||
static inline bool isDigit(char c) { return '0' <= c && c <= '9'; }
|
||||
|
||||
Loading…
Reference in New Issue
Block a user