Compare commits
4 Commits
dbe8569a3b
...
83e28c9022
| Author | SHA1 | Date | |
|---|---|---|---|
| 83e28c9022 | |||
| 5c9197436f | |||
| f710aa4199 | |||
| facacc90f8 |
2
.vscode/launch.json
vendored
2
.vscode/launch.json
vendored
@ -7,7 +7,7 @@
|
||||
"request": "launch",
|
||||
"program": "${workspaceFolder}/bin/tcpu",
|
||||
"windows": { "program": "${workspaceFolder}/bin/tcpu.exe" },
|
||||
"args": [ "-c", "s.tasm", "o.bin" ],
|
||||
"args": [ "-c", "../examples/s.tasm", "o.bin", "--debug" ],
|
||||
"cwd": "${workspaceFolder}/bin",
|
||||
"preLaunchTask": "build_exec_dbg",
|
||||
"stopAtEntry": false,
|
||||
|
||||
16
examples/s.tasm
Normal file
16
examples/s.tasm
Normal file
@ -0,0 +1,16 @@
|
||||
/*
|
||||
"hello world" program in my assembly language
|
||||
*/
|
||||
|
||||
.data:
|
||||
// named array of 8-bit values
|
||||
const8 msg "Hello, World :3\0"
|
||||
|
||||
.main:
|
||||
push ax 1; // sys_write
|
||||
push bx 1; // stdout
|
||||
push cx @msg; // address of msg data
|
||||
push dx #msg; // size of msg data
|
||||
sys
|
||||
push ax 0
|
||||
exit
|
||||
@ -49,7 +49,7 @@ i32 VM_boot(VM* vm){
|
||||
while (vm->current_pos < vm->data_size){
|
||||
u8 opcode = vm->data[vm->current_pos];
|
||||
|
||||
const Instruction* instr = Instruction_getFromOpcode(opcode);
|
||||
const Instruction* instr = Instruction_getByOpcode(opcode);
|
||||
// printfe("[at 0x%x] %02X %s\n", (u32)vm->current_pos, opcode, instr->name);
|
||||
if(instr == NULL){
|
||||
VM_setError(vm, "unknown opcode %02X", opcode);
|
||||
|
||||
@ -16,22 +16,27 @@
|
||||
return List_##T##_construct((T*)(len > 0 ? malloc(len * sizeof(T)) : NULL), 0, 0);\
|
||||
}\
|
||||
\
|
||||
void List_##T##_push(List_##T* ptr, T value);
|
||||
T* List_##T##_expand(List_##T* ptr);\
|
||||
void List_##T##_push(List_##T* ptr, T value);\
|
||||
|
||||
|
||||
#define List_define(T)\
|
||||
void List_##T##_push(List_##T* ptr, T value){\
|
||||
u32 max_len = ptr->max_len;\
|
||||
if(ptr->len == max_len){\
|
||||
max_len = max_len * 1.5;\
|
||||
T* List_##T##_expand(List_##T* ptr){\
|
||||
if(ptr->len == ptr->max_len){\
|
||||
u32 max_len = ptr->max_len * 1.5;\
|
||||
max_len += __List_padding_in_sizeof_T(T);\
|
||||
/* branchless version of max(max_len, __List_min_size) */\
|
||||
max_len += (max_len < __List_min_size) * (__List_min_size - max_len);\
|
||||
ptr->data = (T*)realloc(ptr->data, max_len * sizeof(T));\
|
||||
ptr->max_len = max_len;\
|
||||
}\
|
||||
ptr->data[ptr->len++] = value;\
|
||||
}
|
||||
return &ptr->data[ptr->len++];\
|
||||
}\
|
||||
\
|
||||
void List_##T##_push(List_##T* ptr, T value){\
|
||||
T* empty_cell_ptr = List_##T##_expand(ptr);\
|
||||
*empty_cell_ptr = value;\
|
||||
}\
|
||||
|
||||
#define __List_min_size 16
|
||||
|
||||
|
||||
45
src/compiler/AST.c
Normal file
45
src/compiler/AST.c
Normal file
@ -0,0 +1,45 @@
|
||||
#include "AST.h"
|
||||
|
||||
List_define(Argument);
|
||||
List_define(Operation);
|
||||
List_define(DataDefinition);
|
||||
|
||||
static cstr _ArgumentType_str[] = {
|
||||
"Unset",
|
||||
"Register",
|
||||
"ConstValue",
|
||||
"DataName",
|
||||
"NamedDataPointer",
|
||||
"NamedDataSize",
|
||||
};
|
||||
|
||||
cstr ArgumentType_toString(ArgumentType t){
|
||||
if(t >= ARRAY_SIZE(_ArgumentType_str))
|
||||
return "!!INDEX_ERROR!!";
|
||||
return _ArgumentType_str[t];
|
||||
}
|
||||
|
||||
|
||||
void Section_init(Section* sec, char* name){
|
||||
sec->name = name;
|
||||
sec->data = List_DataDefinition_alloc(256);
|
||||
sec->code = List_Operation_alloc(1024);
|
||||
}
|
||||
|
||||
void Section_free(Section* sec){
|
||||
free(sec->name);
|
||||
free(sec->data.data);
|
||||
free(sec->code.data);
|
||||
}
|
||||
|
||||
|
||||
void AST_init(AST* ast){
|
||||
ast->sections = List_Section_alloc(32);
|
||||
}
|
||||
|
||||
void AST_free(AST* ast){
|
||||
for(u32 i = 0; i != ast->sections.len; i++){
|
||||
Section_free(&ast->sections.data[i]);
|
||||
}
|
||||
free(ast->sections.data);
|
||||
}
|
||||
@ -4,36 +4,57 @@
|
||||
#include "../collections/List.h"
|
||||
|
||||
typedef enum ArgumentType {
|
||||
ArgumentType_NoArgument,
|
||||
ArgumentType_Unset,
|
||||
ArgumentType_Register,
|
||||
ArgumentType_ConstValue,
|
||||
ArgumentType_Name,
|
||||
ArgumentType_DataName,
|
||||
ArgumentType_NamedDataPointer,
|
||||
ArgumentType_NamedDataSize,
|
||||
};
|
||||
} ArgumentType;
|
||||
|
||||
cstr ArgumentType_toString(ArgumentType t);
|
||||
|
||||
|
||||
typedef struct Argument {
|
||||
ArgumentType type;
|
||||
u32 value;
|
||||
} Argument;
|
||||
|
||||
List_declare(Argument);
|
||||
|
||||
|
||||
typedef struct Operation {
|
||||
List_Argument args;
|
||||
Opcode op;
|
||||
} Operation;
|
||||
|
||||
List_declare(Operation);
|
||||
|
||||
|
||||
typedef struct DataDefinition {
|
||||
u8 element_size;
|
||||
u16 size;
|
||||
void* data;
|
||||
u32 element_size;
|
||||
u32 count;
|
||||
cstr name;
|
||||
};
|
||||
void* data;
|
||||
} DataDefinition;
|
||||
|
||||
List_declare(DataDefinition);
|
||||
|
||||
|
||||
typedef struct Section {
|
||||
char* name;
|
||||
List_DataDefinition data;
|
||||
List_Operation code;
|
||||
} Section;
|
||||
|
||||
List_declare(Section);
|
||||
|
||||
void Section_init(Section* Section, char* name);
|
||||
void Section_free(Section* Section);
|
||||
|
||||
typedef struct AST {
|
||||
DataDefinition
|
||||
List_Section sections;
|
||||
} AST;
|
||||
|
||||
/*
|
||||
d8 name ''
|
||||
|
||||
goto label
|
||||
.label:
|
||||
code...
|
||||
*/
|
||||
void AST_init(AST* ast);
|
||||
void AST_free(AST* ast);
|
||||
|
||||
149
src/compiler/Compiler.c
Normal file
149
src/compiler/Compiler.c
Normal file
@ -0,0 +1,149 @@
|
||||
#include "Compiler_internal.h"
|
||||
|
||||
|
||||
void Compiler_init(Compiler* cmp){
|
||||
memset(cmp, 0, sizeof(Compiler));
|
||||
cmp->state = CompilerState_Initial;
|
||||
cmp->tokens = List_Token_alloc(4096);
|
||||
cmp->line_lengths = List_u32_alloc(1024);
|
||||
AST_init(&cmp->ast);
|
||||
}
|
||||
|
||||
void Compiler_free(Compiler* cmp){
|
||||
free(cmp->code);
|
||||
free(cmp->tokens.data);
|
||||
free(cmp->line_lengths.data);
|
||||
AST_free(&cmp->ast);
|
||||
}
|
||||
|
||||
CodePos Compiler_getLineAndColumn(Compiler* cmp, u32 pos){
|
||||
u32 prev_lines_len = 0;
|
||||
if(pos >= cmp->code_len)
|
||||
return CodePos_create(0, 0);
|
||||
|
||||
for(u32 i = 0; i < cmp->line_lengths.len; i++){
|
||||
u32 line_len = cmp->line_lengths.data[i];
|
||||
if(prev_lines_len + line_len > pos)
|
||||
return CodePos_create(i + 1, pos + 1 - prev_lines_len);
|
||||
prev_lines_len += line_len;
|
||||
}
|
||||
|
||||
return CodePos_create(0, 0);
|
||||
}
|
||||
|
||||
void _Compiler_setError(Compiler* cmp, cstr context, cstr format, ...){
|
||||
// happens at the end of file
|
||||
if(cmp->pos >= cmp->code_len)
|
||||
cmp->pos = cmp->code_len - 1;
|
||||
char position_str[32];
|
||||
CodePos code_pos = Compiler_getLineAndColumn(cmp, cmp->pos);
|
||||
sprintf(position_str, "[at %u:%u][", code_pos.line, code_pos.column);
|
||||
char* real_format = strcat_malloc(position_str, context, "] ", format);
|
||||
va_list argv;
|
||||
va_start(argv, format);
|
||||
char* NULLABLE(buf) = vsprintf_malloc(512, real_format, argv);
|
||||
va_end(argv);
|
||||
free(real_format);
|
||||
if(buf == NULL){
|
||||
buf = malloc(16);
|
||||
strcpy(buf, "SPRINTF FAILED");
|
||||
}
|
||||
cmp->state = CompilerState_Error;
|
||||
cmp->error_message = buf;
|
||||
}
|
||||
|
||||
#define setError(FORMAT, ...) {\
|
||||
Compiler_setError(cmp, FORMAT, ##__VA_ARGS__);\
|
||||
}
|
||||
|
||||
char* Compiler_extractTokenStr(Compiler* cmp, Token t){
|
||||
char* s = malloc(t.length + 1);
|
||||
memcpy(s, cmp->code, t.length);
|
||||
s[t.length] = 0;
|
||||
return s;
|
||||
}
|
||||
|
||||
static bool compileFile(Compiler* cmp, FILE* f){
|
||||
returnErrorIf_auto(cmp->state != CompilerState_Parsing);
|
||||
cmp->state = CompilerState_Compiling;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Compiler_compile(Compiler* cmp, cstr source_file_name, cstr out_file_name, bool debug_log){
|
||||
FILE* f = fopen(source_file_name, "rb");
|
||||
if(f == NULL)
|
||||
returnError("ERROR: can't open file '%s'", source_file_name);
|
||||
|
||||
List_u8 buf = List_u8_alloc(64 * 1024);
|
||||
int ret;
|
||||
while((ret = fgetc(f)) != EOF) {
|
||||
List_u8_push(&buf, ret);
|
||||
}
|
||||
if(ferror(f)){
|
||||
free(buf.data);
|
||||
fclose(f);
|
||||
returnError("can't read file '%s'", source_file_name);
|
||||
}
|
||||
fclose(f);
|
||||
|
||||
if(buf.len == 0){
|
||||
free(buf.data);
|
||||
fclose(f);
|
||||
returnError("soucre file is empty");
|
||||
}
|
||||
|
||||
cmp->code = (char*)buf.data;
|
||||
cmp->code_len = buf.len;
|
||||
List_u8_push(&buf, 0);
|
||||
|
||||
f = fopen(out_file_name, "wb");
|
||||
if(f == NULL){
|
||||
free(buf.data);
|
||||
returnError("ERROR: can't open file '%s'", out_file_name);
|
||||
}
|
||||
|
||||
if(debug_log){
|
||||
printf("----------------------------------[%s]---------------------------------\n", source_file_name);
|
||||
fputs(cmp->code, stdout);
|
||||
fputc('\n', stdout);
|
||||
}
|
||||
|
||||
bool success = Compiler_lex(cmp);
|
||||
if(debug_log){
|
||||
printf("------------------------------------[lines]-----------------------------------\n");
|
||||
for(u32 i = 0; i < cmp->line_lengths.len; i++){
|
||||
printf("[%u] length: %u\n", i+1, cmp->line_lengths.data[i]);
|
||||
}
|
||||
printf("------------------------------------[tokens]-----------------------------------\n");
|
||||
for(u32 i = 0; i < cmp->tokens.len; i++){
|
||||
Token t = cmp->tokens.data[i];
|
||||
CodePos pos = Compiler_getLineAndColumn(cmp, t.begin);
|
||||
char* tokstr = malloc(4096);
|
||||
strncpy(tokstr, cmp->code + t.begin, t.length);
|
||||
tokstr[t.length] = 0;
|
||||
printf("[l:%3u, c:%3u] %s '%s'\n",
|
||||
pos.line, pos.column,
|
||||
TokenType_toString(t.type), tokstr);
|
||||
free(tokstr);
|
||||
}
|
||||
}
|
||||
if(!success){
|
||||
fclose(f);
|
||||
return false;
|
||||
}
|
||||
|
||||
success = Compiler_parse(cmp);
|
||||
if(!success){
|
||||
fclose(f);
|
||||
return false;
|
||||
}
|
||||
|
||||
success = compileFile(cmp, f);
|
||||
fclose(f);
|
||||
if(success){
|
||||
cmp->state = CompilerState_Success;
|
||||
}
|
||||
|
||||
return success;
|
||||
}
|
||||
263
src/compiler/Lexer.c
Normal file
263
src/compiler/Lexer.c
Normal file
@ -0,0 +1,263 @@
|
||||
#include "Compiler_internal.h"
|
||||
|
||||
#define setError(FORMAT, ...) {\
|
||||
completeLine(cmp);\
|
||||
Compiler_setError(cmp, FORMAT, ##__VA_ARGS__);\
|
||||
}
|
||||
|
||||
#define Error_unexpectedCharacter(C) "unexpected character '%c'", C
|
||||
#define Error_endOfFile "unexpected end of file"
|
||||
|
||||
static void completeLine(Compiler* cmp){
|
||||
List_u32_push(&cmp->line_lengths, cmp->column);
|
||||
cmp->column = 0;
|
||||
}
|
||||
|
||||
static void readCommentSingleLine(Compiler* cmp){
|
||||
char c; // '/'
|
||||
Token tok = Token_construct(TokenType_SingleLineComment, cmp->pos - 1, 0);
|
||||
cmp->column++;
|
||||
cmp->pos++;
|
||||
|
||||
while(cmp->pos < cmp->code_len){
|
||||
c = cmp->code[cmp->pos];
|
||||
// end of line
|
||||
if(c == '\r' || c == '\n'){
|
||||
tok.length = cmp->pos - tok.begin;
|
||||
List_Token_push(&cmp->tokens, tok);
|
||||
// cmp->line will be increased in lex()
|
||||
return;
|
||||
}
|
||||
|
||||
cmp->column++;
|
||||
cmp->pos++;
|
||||
}
|
||||
|
||||
// end of file
|
||||
tok.length = cmp->pos - tok.begin;
|
||||
List_Token_push(&cmp->tokens, tok);
|
||||
}
|
||||
|
||||
static void readCommentMultiLine(Compiler* cmp){
|
||||
char c; // '*'
|
||||
Token tok = Token_construct(TokenType_MultiLineComment, cmp->pos - 1, 0);
|
||||
cmp->column++;
|
||||
cmp->pos++;
|
||||
|
||||
while(cmp->pos < cmp->code_len){
|
||||
c = cmp->code[cmp->pos];
|
||||
// closing comment
|
||||
if(cmp->pos > tok.begin + 3 && c == '/' && cmp->code[cmp->pos - 1] == '*') {
|
||||
tok.length = cmp->pos - tok.begin + 1;
|
||||
List_Token_push(&cmp->tokens, tok);
|
||||
return;
|
||||
}
|
||||
|
||||
if(c == '\n')
|
||||
completeLine(cmp);
|
||||
cmp->column++;
|
||||
cmp->pos++;
|
||||
}
|
||||
|
||||
// end of file
|
||||
setError(Error_endOfFile);
|
||||
}
|
||||
|
||||
static void readComment(Compiler* cmp){
|
||||
char c; // '/'
|
||||
if(cmp->pos + 1 == cmp->code_len){
|
||||
setError(Error_endOfFile);
|
||||
return;
|
||||
}
|
||||
|
||||
c = cmp->code[cmp->pos + 1];
|
||||
if(c == '\r' || c == '\n'){
|
||||
setError(Error_unexpectedCharacter(cmp->code[--cmp->pos]));
|
||||
return;
|
||||
}
|
||||
|
||||
cmp->pos++;
|
||||
cmp->column++;
|
||||
if(c == '/')
|
||||
readCommentSingleLine(cmp);
|
||||
else if(c == '*')
|
||||
readCommentMultiLine(cmp);
|
||||
else setError(Error_unexpectedCharacter(c));
|
||||
}
|
||||
|
||||
static void readLabel(Compiler* cmp){
|
||||
char c; // '.'
|
||||
cmp->pos++;
|
||||
cmp->column++;
|
||||
Token tok = Token_construct(TokenType_Label, cmp->pos, 0);
|
||||
|
||||
while(cmp->pos < cmp->code_len){
|
||||
c = cmp->code[cmp->pos];
|
||||
// end of line
|
||||
if(c == ':' || c == '\r' || c == '\n'){
|
||||
tok.length = cmp->pos - tok.begin;
|
||||
if(tok.length > 0)
|
||||
List_Token_push(&cmp->tokens, tok);
|
||||
else setError(Error_unexpectedCharacter(cmp->code[--cmp->pos]));
|
||||
// cmp->line will be increased in lex()
|
||||
return;
|
||||
}
|
||||
|
||||
if(!isAlphabeticalLower(c) && !isAlphabeticalUpper(c) && !isDigit(c) &&
|
||||
c != '_' && c != '.'){
|
||||
setError(Error_unexpectedCharacter(c));
|
||||
return;
|
||||
}
|
||||
|
||||
cmp->column++;
|
||||
cmp->pos++;
|
||||
}
|
||||
|
||||
// end of file
|
||||
tok.length = cmp->pos - tok.begin;
|
||||
if(tok.length > 0)
|
||||
List_Token_push(&cmp->tokens, tok);
|
||||
else setError(Error_endOfFile);
|
||||
}
|
||||
|
||||
static void readArguments(Compiler* cmp){
|
||||
char c; // space
|
||||
Token tok = Token_construct(TokenType_Unset, cmp->pos, 0);
|
||||
char quot = '\0'; // quotation character of a string value
|
||||
|
||||
while(cmp->pos < cmp->code_len){
|
||||
c = cmp->code[cmp->pos];
|
||||
|
||||
// string argument reading
|
||||
if(quot != '\0'){
|
||||
if(c == quot && cmp->code[cmp->pos - 1] != '\\'){
|
||||
quot = '\0';
|
||||
}
|
||||
else if(c == '\r' || c == '\n'){
|
||||
setError("line end reached but string hasn't been closed yet");
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// end of line
|
||||
else if(c == '\r' || c == '\n' || c == ';'){
|
||||
tok.length = cmp->pos - tok.begin;
|
||||
if(tok.length > 0)
|
||||
List_Token_push(&cmp->tokens, tok);
|
||||
// cmp->line will be increased in lex()
|
||||
return;
|
||||
}
|
||||
|
||||
// new argument begins
|
||||
else if(c == ' ' || c == '\t'){
|
||||
tok.length = cmp->pos - tok.begin;
|
||||
if(tok.length > 0)
|
||||
List_Token_push(&cmp->tokens, tok);
|
||||
tok = Token_construct(TokenType_Unset, cmp->pos + 1, 0);
|
||||
}
|
||||
|
||||
else if(tok.type == TokenType_Unset){
|
||||
if(c == '\''){
|
||||
tok.type = TokenType_Char;
|
||||
quot = c;
|
||||
}
|
||||
else if(c == '"'){
|
||||
tok.type = TokenType_String;
|
||||
quot = c;
|
||||
}
|
||||
else if(c == '@')
|
||||
tok.type = TokenType_NamedDataPointer;
|
||||
else if(c == '#')
|
||||
tok.type = TokenType_NamedDataSize;
|
||||
else if(isDigit(c))
|
||||
tok.type = TokenType_Number;
|
||||
else tok.type = TokenType_Name;
|
||||
}
|
||||
|
||||
cmp->column++;
|
||||
cmp->pos++;
|
||||
}
|
||||
|
||||
// end of file
|
||||
tok.length = cmp->pos - tok.begin;
|
||||
if(tok.length > 0)
|
||||
List_Token_push(&cmp->tokens, tok);
|
||||
}
|
||||
|
||||
static void readInstruction(Compiler* cmp){
|
||||
Token tok = Token_construct(TokenType_Instruction, cmp->pos, 0);
|
||||
cmp->pos++;
|
||||
cmp->column++;
|
||||
|
||||
while(cmp->pos < cmp->code_len){
|
||||
char c = cmp->code[cmp->pos];
|
||||
// end of line
|
||||
if(c == '\r' || c == '\n' || c == ';'){
|
||||
tok.length = cmp->pos - tok.begin;
|
||||
List_Token_push(&cmp->tokens, tok);
|
||||
// cmp->line will be increased in lex()
|
||||
return;
|
||||
}
|
||||
|
||||
// arguments begin
|
||||
if(c == ' ' || c == '\t'){
|
||||
tok.length = cmp->pos - tok.begin;
|
||||
List_Token_push(&cmp->tokens, tok);
|
||||
readArguments(cmp);
|
||||
return;
|
||||
}
|
||||
|
||||
if(!isAlphabeticalLower(c) && !isAlphabeticalUpper(c) && !isDigit(c)){
|
||||
setError(Error_unexpectedCharacter(c));
|
||||
return;
|
||||
}
|
||||
|
||||
cmp->column++;
|
||||
cmp->pos++;
|
||||
}
|
||||
|
||||
// end of file
|
||||
tok.length = cmp->pos - tok.begin;
|
||||
List_Token_push(&cmp->tokens, tok);
|
||||
}
|
||||
|
||||
bool Compiler_lex(Compiler* cmp){
|
||||
returnErrorIf_auto(cmp->state != CompilerState_Initial);
|
||||
cmp->state = CompilerState_Lexing;
|
||||
cmp->column = 1;
|
||||
|
||||
while(cmp->pos < cmp->code_len){
|
||||
char c = cmp->code[cmp->pos];
|
||||
switch(c){
|
||||
// skip blank characters
|
||||
case ' ': case '\t': case '\r': case '\n':
|
||||
break;
|
||||
// try read comment
|
||||
case '/':
|
||||
readComment(cmp);
|
||||
break;
|
||||
// try read label
|
||||
case '.':
|
||||
readLabel(cmp);
|
||||
break;
|
||||
default:
|
||||
// try read instruction
|
||||
if(isAlphabeticalLower(c) || isAlphabeticalUpper(c))
|
||||
readInstruction(cmp);
|
||||
else returnError(Error_unexpectedCharacter(c));
|
||||
break;
|
||||
}
|
||||
|
||||
if(cmp->state == CompilerState_Error)
|
||||
return false;
|
||||
|
||||
c = cmp->code[cmp->pos];
|
||||
if(c == '\n')
|
||||
completeLine(cmp);
|
||||
cmp->column++;
|
||||
cmp->pos++;
|
||||
}
|
||||
|
||||
completeLine(cmp);
|
||||
return true;
|
||||
}
|
||||
@ -1,470 +0,0 @@
|
||||
#include "Lexer.h"
|
||||
|
||||
List_define(Token);
|
||||
|
||||
CodePos Lexer_getLineAndColumn(Lexer* cmp, u32 pos){
|
||||
u32 prev_lines_len = 0;
|
||||
if(pos >= cmp->code_len)
|
||||
return CodePos_create(0, 0);
|
||||
|
||||
for(u32 i = 0; i < cmp->line_lengths.len; i++){
|
||||
u32 line_len = cmp->line_lengths.data[i];
|
||||
if(prev_lines_len + line_len > pos)
|
||||
return CodePos_create(i + 1, pos + 1 - prev_lines_len);
|
||||
prev_lines_len += line_len;
|
||||
}
|
||||
|
||||
return CodePos_create(0, 0);
|
||||
}
|
||||
|
||||
static void completeLine(Lexer* cmp){
|
||||
List_u32_push(&cmp->line_lengths, cmp->column);
|
||||
cmp->column = 0;
|
||||
}
|
||||
|
||||
void _Lexer_setError(Lexer* cmp, cstr context, cstr format, ...){
|
||||
completeLine(cmp);
|
||||
// happens at the end of file
|
||||
if(cmp->pos >= cmp->code_len)
|
||||
cmp->pos = cmp->code_len - 1;
|
||||
char position_str[32];
|
||||
CodePos code_pos = Lexer_getLineAndColumn(cmp, cmp->pos);
|
||||
sprintf(position_str, "[at %u:%u][", code_pos.line, code_pos.column);
|
||||
char* real_format = strcat_malloc(position_str, context, "] ", format);
|
||||
va_list argv;
|
||||
va_start(argv, format);
|
||||
char* NULLABLE(buf) = vsprintf_malloc(512, real_format, argv);
|
||||
va_end(argv);
|
||||
free(real_format);
|
||||
if(buf == NULL){
|
||||
buf = malloc(16);
|
||||
strcpy(buf, "SPRINTF FAILED");
|
||||
}
|
||||
cmp->state = LexerState_Error;
|
||||
cmp->error_message = buf;
|
||||
}
|
||||
|
||||
|
||||
#define setError(FORMAT, ...) Lexer_setError(cmp, FORMAT, ##__VA_ARGS__)
|
||||
|
||||
#define returnError(FORMAT, ...) {\
|
||||
setError(FORMAT, ##__VA_ARGS__);\
|
||||
return false;\
|
||||
}
|
||||
|
||||
#define returnErrorIf(STATEMENT, FORMAT, ...) if(STATEMENT) returnError(FORMAT, ##__VA_ARGS__)
|
||||
|
||||
#define returnErrorIf_auto(STATEMENT) returnErrorIf(STATEMENT, #STATEMENT)
|
||||
|
||||
#define Error_unexpectedCharacter(C) "unexpected character '%c'", C
|
||||
#define Error_endOfFile "unexpected end of file"
|
||||
|
||||
void Lexer_init(Lexer* cmp){
|
||||
memset(cmp, 0, sizeof(Lexer));
|
||||
cmp->state = LexerState_Initial;
|
||||
cmp->tokens = List_Token_alloc(4096);
|
||||
cmp->line_lengths = List_u32_alloc(1024);
|
||||
}
|
||||
|
||||
void Lexer_free(Lexer* cmp){
|
||||
free(cmp->code);
|
||||
free(cmp->tokens.data);
|
||||
free(cmp->line_lengths.data);
|
||||
}
|
||||
|
||||
static void readCommentSingleLine(Lexer* cmp){
|
||||
char c; // '/'
|
||||
Token tok = Token_construct(TokenType_SingleLineComment, cmp->pos - 1, 0);
|
||||
cmp->column++;
|
||||
cmp->pos++;
|
||||
|
||||
while(cmp->pos < cmp->code_len){
|
||||
c = cmp->code[cmp->pos];
|
||||
// end of line
|
||||
if(c == '\r' || c == '\n'){
|
||||
tok.length = cmp->pos - tok.begin;
|
||||
List_Token_push(&cmp->tokens, tok);
|
||||
// cmp->line will be increased in lex()
|
||||
return;
|
||||
}
|
||||
|
||||
cmp->column++;
|
||||
cmp->pos++;
|
||||
}
|
||||
|
||||
// end of file
|
||||
tok.length = cmp->pos - tok.begin;
|
||||
List_Token_push(&cmp->tokens, tok);
|
||||
}
|
||||
|
||||
static void readCommentMultiLine(Lexer* cmp){
|
||||
char c; // '*'
|
||||
Token tok = Token_construct(TokenType_MultiLineComment, cmp->pos - 1, 0);
|
||||
cmp->column++;
|
||||
cmp->pos++;
|
||||
|
||||
while(cmp->pos < cmp->code_len){
|
||||
c = cmp->code[cmp->pos];
|
||||
// closing comment
|
||||
if(cmp->pos > tok.begin + 3 && c == '/' && cmp->code[cmp->pos - 1] == '*') {
|
||||
tok.length = cmp->pos - tok.begin + 1;
|
||||
List_Token_push(&cmp->tokens, tok);
|
||||
return;
|
||||
}
|
||||
|
||||
if(c == '\n')
|
||||
completeLine(cmp);
|
||||
cmp->column++;
|
||||
cmp->pos++;
|
||||
}
|
||||
|
||||
// end of file
|
||||
setError(Error_endOfFile);
|
||||
}
|
||||
|
||||
static void readComment(Lexer* cmp){
|
||||
char c; // '/'
|
||||
if(cmp->pos + 1 == cmp->code_len){
|
||||
setError(Error_endOfFile);
|
||||
return;
|
||||
}
|
||||
|
||||
c = cmp->code[cmp->pos + 1];
|
||||
if(c == '\r' || c == '\n'){
|
||||
setError(Error_unexpectedCharacter(cmp->code[--cmp->pos]));
|
||||
return;
|
||||
}
|
||||
|
||||
cmp->pos++;
|
||||
cmp->column++;
|
||||
if(c == '/')
|
||||
readCommentSingleLine(cmp);
|
||||
else if(c == '*')
|
||||
readCommentMultiLine(cmp);
|
||||
else setError(Error_unexpectedCharacter(c));
|
||||
}
|
||||
|
||||
static void readLabel(Lexer* cmp){
|
||||
char c; // '.'
|
||||
cmp->pos++;
|
||||
cmp->column++;
|
||||
Token tok = Token_construct(TokenType_Label, cmp->pos, 0);
|
||||
|
||||
while(cmp->pos < cmp->code_len){
|
||||
c = cmp->code[cmp->pos];
|
||||
// end of line
|
||||
if(c == ':' || c == '\r' || c == '\n'){
|
||||
tok.length = cmp->pos - tok.begin;
|
||||
if(tok.length > 0)
|
||||
List_Token_push(&cmp->tokens, tok);
|
||||
else setError(Error_unexpectedCharacter(cmp->code[--cmp->pos]));
|
||||
// cmp->line will be increased in lex()
|
||||
return;
|
||||
}
|
||||
|
||||
if(!isAlphabeticalLower(c) && !isAlphabeticalUpper(c) && !isDigit(c) &&
|
||||
c != '_' && c != '.'){
|
||||
setError(Error_unexpectedCharacter(c));
|
||||
return;
|
||||
}
|
||||
|
||||
cmp->column++;
|
||||
cmp->pos++;
|
||||
}
|
||||
|
||||
// end of file
|
||||
tok.length = cmp->pos - tok.begin;
|
||||
if(tok.length > 0)
|
||||
List_Token_push(&cmp->tokens, tok);
|
||||
else setError(Error_endOfFile);
|
||||
}
|
||||
|
||||
static void readArguments(Lexer* cmp){
|
||||
char c; // space
|
||||
cmp->pos++;
|
||||
cmp->column++;
|
||||
Token tok = Token_construct(TokenType_Argument, cmp->pos, 0);
|
||||
|
||||
while(cmp->pos < cmp->code_len){
|
||||
c = cmp->code[cmp->pos];
|
||||
// end of line
|
||||
if(c == '\r' || c == '\n' || c == ';'){
|
||||
tok.length = cmp->pos - tok.begin;
|
||||
if(tok.length > 0)
|
||||
List_Token_push(&cmp->tokens, tok);
|
||||
// cmp->line will be increased in lex()
|
||||
return;
|
||||
}
|
||||
|
||||
// new argument begins
|
||||
if(c == ' ' || c == '\t'){
|
||||
tok.length = cmp->pos - tok.begin;
|
||||
if(tok.length > 0)
|
||||
List_Token_push(&cmp->tokens, tok);
|
||||
tok.begin = cmp->pos + 1;
|
||||
}
|
||||
|
||||
|
||||
cmp->column++;
|
||||
cmp->pos++;
|
||||
}
|
||||
|
||||
// end of file
|
||||
tok.length = cmp->pos - tok.begin;
|
||||
if(tok.length > 0)
|
||||
List_Token_push(&cmp->tokens, tok);
|
||||
}
|
||||
|
||||
static void readInstruction(Lexer* cmp){
|
||||
Token tok = Token_construct(TokenType_Instruction, cmp->pos, 0);
|
||||
cmp->pos++;
|
||||
cmp->column++;
|
||||
|
||||
while(cmp->pos < cmp->code_len){
|
||||
char c = cmp->code[cmp->pos];
|
||||
// end of line
|
||||
if(c == '\r' || c == '\n' || c == ';'){
|
||||
tok.length = cmp->pos - tok.begin;
|
||||
List_Token_push(&cmp->tokens, tok);
|
||||
// cmp->line will be increased in lex()
|
||||
return;
|
||||
}
|
||||
|
||||
// arguments begin
|
||||
if(c == ' ' || c == '\t'){
|
||||
tok.length = cmp->pos - tok.begin;
|
||||
List_Token_push(&cmp->tokens, tok);
|
||||
readArguments(cmp);
|
||||
return;
|
||||
}
|
||||
|
||||
if(!isAlphabeticalLower(c) && !isAlphabeticalUpper(c) && !isDigit(c)){
|
||||
setError(Error_unexpectedCharacter(c));
|
||||
return;
|
||||
}
|
||||
|
||||
cmp->column++;
|
||||
cmp->pos++;
|
||||
}
|
||||
|
||||
// end of file
|
||||
tok.length = cmp->pos - tok.begin;
|
||||
List_Token_push(&cmp->tokens, tok);
|
||||
}
|
||||
|
||||
static void readChar(Lexer* cmp){
|
||||
Token tok = Token_construct(TokenType_Char, cmp->pos, 0);
|
||||
cmp->pos++;
|
||||
cmp->column++;
|
||||
|
||||
while(cmp->pos < cmp->code_len){
|
||||
char c = cmp->code[cmp->pos];
|
||||
// end of line
|
||||
if(c == '\r' || c == '\n'){
|
||||
setError(Error_unexpectedCharacter(cmp->code[--cmp->pos]));
|
||||
return;
|
||||
}
|
||||
|
||||
if(c == '\''){
|
||||
tok.length = cmp->pos - tok.begin + 1;
|
||||
List_Token_push(&cmp->tokens, tok);
|
||||
return;
|
||||
}
|
||||
|
||||
cmp->column++;
|
||||
cmp->pos++;
|
||||
}
|
||||
|
||||
// end of file
|
||||
setError(Error_endOfFile);
|
||||
}
|
||||
|
||||
static void readString(Lexer* cmp){
|
||||
Token tok = Token_construct(TokenType_String, cmp->pos, 0);
|
||||
cmp->pos++;
|
||||
cmp->column++;
|
||||
|
||||
while(cmp->pos < cmp->code_len){
|
||||
char c = cmp->code[cmp->pos];
|
||||
// end of line
|
||||
if(c == '\r' || c == '\n'){
|
||||
setError(Error_unexpectedCharacter(cmp->code[--cmp->pos]));
|
||||
return;
|
||||
}
|
||||
|
||||
if(c == '"'){
|
||||
tok.length = cmp->pos - tok.begin + 1;
|
||||
List_Token_push(&cmp->tokens, tok);
|
||||
return;
|
||||
}
|
||||
|
||||
cmp->column++;
|
||||
cmp->pos++;
|
||||
}
|
||||
|
||||
// end of file
|
||||
setError(Error_endOfFile);
|
||||
}
|
||||
|
||||
static void readNumber(Lexer* cmp){
|
||||
Token tok = Token_construct(TokenType_Number, cmp->pos, 0);
|
||||
cmp->pos++;
|
||||
cmp->column++;
|
||||
|
||||
while(cmp->pos < cmp->code_len){
|
||||
char c = cmp->code[cmp->pos];
|
||||
|
||||
if(c == '\r' || c == '\n' || c == ' ' || c == '\t' || c == ',' || c == ';'){
|
||||
tok.length = cmp->pos - tok.begin;
|
||||
List_Token_push(&cmp->tokens, tok);
|
||||
return;
|
||||
}
|
||||
|
||||
cmp->column++;
|
||||
cmp->pos++;
|
||||
}
|
||||
|
||||
// end of file
|
||||
tok.length = cmp->pos - tok.begin;
|
||||
List_Token_push(&cmp->tokens, tok);
|
||||
}
|
||||
|
||||
static bool lex(Lexer* cmp){
|
||||
returnErrorIf_auto(cmp->state != LexerState_Initial);
|
||||
cmp->state = LexerState_Lexing;
|
||||
cmp->column = 1;
|
||||
|
||||
while(cmp->pos < cmp->code_len){
|
||||
char c = cmp->code[cmp->pos];
|
||||
switch(c){
|
||||
// skip blank characters
|
||||
case ' ': case '\t': case '\r': case '\n':
|
||||
break;
|
||||
// try read comment
|
||||
case '/':
|
||||
readComment(cmp);
|
||||
break;
|
||||
// try read label
|
||||
case '.':
|
||||
readLabel(cmp);
|
||||
break;
|
||||
case '"':
|
||||
readString(cmp);
|
||||
break;
|
||||
case '\'':
|
||||
readChar(cmp);
|
||||
break;
|
||||
default:
|
||||
// try read instruction
|
||||
if(isAlphabeticalLower(c) || isAlphabeticalUpper(c))
|
||||
readInstruction(cmp);
|
||||
else if(isDigit(c))
|
||||
readNumber(cmp);
|
||||
else returnError(Error_unexpectedCharacter(c));
|
||||
break;
|
||||
}
|
||||
|
||||
if(cmp->state == LexerState_Error)
|
||||
return false;
|
||||
|
||||
c = cmp->code[cmp->pos];
|
||||
if(c == '\n')
|
||||
completeLine(cmp);
|
||||
cmp->column++;
|
||||
cmp->pos++;
|
||||
}
|
||||
|
||||
completeLine(cmp);
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool parse(Lexer* cmp){
|
||||
returnErrorIf_auto(cmp->state != LexerState_Lexing);
|
||||
cmp->state = LexerState_Parsing;
|
||||
|
||||
return true;
|
||||
}
|
||||
static bool compile(Lexer* cmp, FILE* f){
|
||||
returnErrorIf_auto(cmp->state != LexerState_Parsing);
|
||||
cmp->state = LexerState_Compiling;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Lexer_compileTasm(Lexer* cmp, cstr source_file_name, cstr out_file_name, bool debug){
|
||||
FILE* f = fopen(source_file_name, "rb");
|
||||
if(f == NULL)
|
||||
returnError("ERROR: can't open file '%s'", source_file_name);
|
||||
|
||||
List_u8 buf = List_u8_alloc(64 * 1024);
|
||||
int ret;
|
||||
while((ret = fgetc(f)) != EOF) {
|
||||
List_u8_push(&buf, ret);
|
||||
}
|
||||
if(ferror(f)){
|
||||
free(buf.data);
|
||||
fclose(f);
|
||||
returnError("can't read file '%s'", source_file_name);
|
||||
}
|
||||
fclose(f);
|
||||
|
||||
if(buf.len == 0){
|
||||
free(buf.data);
|
||||
fclose(f);
|
||||
returnError("soucre file is empty");
|
||||
}
|
||||
|
||||
cmp->code = (char*)buf.data;
|
||||
cmp->code_len = buf.len;
|
||||
List_u8_push(&buf, 0);
|
||||
|
||||
f = fopen(out_file_name, "wb");
|
||||
if(f == NULL){
|
||||
free(buf.data);
|
||||
returnError("ERROR: can't open file '%s'", out_file_name);
|
||||
}
|
||||
|
||||
if(debug){
|
||||
printf("----------------------------------[%s]---------------------------------\n", source_file_name);
|
||||
fputs(cmp->code, stdout);
|
||||
fputc('\n', stdout);
|
||||
}
|
||||
|
||||
bool success = lex(cmp);
|
||||
if(debug){
|
||||
printf("------------------------------------[lines]-----------------------------------\n");
|
||||
for(u32 i = 0; i < cmp->line_lengths.len; i++){
|
||||
printf("[%u] length: %u\n", i+1, cmp->line_lengths.data[i]);
|
||||
}
|
||||
printf("------------------------------------[tokens]-----------------------------------\n");
|
||||
for(u32 i = 0; i < cmp->tokens.len; i++){
|
||||
Token t = cmp->tokens.data[i];
|
||||
CodePos pos = Lexer_getLineAndColumn(cmp, t.begin);
|
||||
char* tokstr = malloc(4096);
|
||||
strncpy(tokstr, cmp->code + t.begin, t.length);
|
||||
tokstr[t.length] = 0;
|
||||
printf("[l:%3u, c:%3u] %s '%s'\n",
|
||||
pos.line, pos.column,
|
||||
TokenType_toString(t.type), tokstr);
|
||||
free(tokstr);
|
||||
}
|
||||
}
|
||||
if(!success){
|
||||
fclose(f);
|
||||
return false;
|
||||
}
|
||||
|
||||
success = parse(cmp);
|
||||
if(!success){
|
||||
fclose(f);
|
||||
return false;
|
||||
}
|
||||
|
||||
success = compile(cmp, f);
|
||||
fclose(f);
|
||||
if(success){
|
||||
cmp->state = LexerState_Success;
|
||||
}
|
||||
|
||||
return success;
|
||||
}
|
||||
@ -1,48 +1,34 @@
|
||||
#pragma once
|
||||
#include "../std.h"
|
||||
#include "../collections/List.h"
|
||||
#include "token.h"
|
||||
#include "Token.h"
|
||||
#include "AST.h"
|
||||
|
||||
List_declare(Token);
|
||||
typedef enum CompilerState {
|
||||
CompilerState_Initial,
|
||||
CompilerState_Lexing,
|
||||
CompilerState_Parsing,
|
||||
CompilerState_Compiling,
|
||||
CompilerState_Error,
|
||||
CompilerState_Success
|
||||
} CompilerState;
|
||||
|
||||
typedef enum LexerState {
|
||||
LexerState_Initial,
|
||||
LexerState_Lexing,
|
||||
LexerState_Parsing,
|
||||
LexerState_Compiling,
|
||||
LexerState_Error,
|
||||
LexerState_Success
|
||||
} LexerState;
|
||||
|
||||
typedef struct Lexer {
|
||||
typedef struct Compiler {
|
||||
char* code;
|
||||
u32 code_len;
|
||||
u32 column; // > 0 if code parsing started
|
||||
u32 pos;
|
||||
LexerState state;
|
||||
CompilerState state;
|
||||
NULLABLE(char* error_message);
|
||||
List_Token tokens;
|
||||
List_u32 line_lengths;
|
||||
} Lexer;
|
||||
AST ast;
|
||||
u32 tok_i;
|
||||
} Compiler;
|
||||
|
||||
void Lexer_init(Lexer* cmp);
|
||||
void Lexer_free(Lexer* cmp);
|
||||
void Compiler_init(Compiler* cmp);
|
||||
void Compiler_free(Compiler* cmp);
|
||||
|
||||
/// @brief compile assembly language code to machine code
|
||||
/// @return true if no errors, false if any error occured (check cmp->error_message)
|
||||
bool Lexer_compileTasm(Lexer* cmp, cstr source_file_name, cstr out_file_name, bool debug);
|
||||
|
||||
#define Lexer_setError(cmp, format, ...) _Lexer_setError(cmp, __func__, format ,##__VA_ARGS__)
|
||||
void _Lexer_setError(Lexer* cmp, cstr context, cstr format, ...) __attribute__((__format__(__printf__, 3, 4)));
|
||||
|
||||
typedef struct CodePos {
|
||||
u32 line; // 0 on error
|
||||
u32 column; // 0 on error
|
||||
} CodePos;
|
||||
|
||||
#define CodePos_create(L, C) ((CodePos){ .line = L, .column = C })
|
||||
|
||||
/// @param pos index in code buffer
|
||||
CodePos Lexer_getLineAndColumn(Lexer* cmp, u32 pos);
|
||||
|
||||
bool Compiler_compile(Compiler* cmp, cstr source_file_name, cstr out_file_name, bool debug);
|
||||
|
||||
@ -0,0 +1,29 @@
|
||||
#include "Compiler.h"
|
||||
|
||||
void _Compiler_setError(Compiler* cmp, cstr context, cstr format, ...) __attribute__((__format__(__printf__, 3, 4)));
|
||||
|
||||
#define Compiler_setError(cmp, format, ...) _Compiler_setError(cmp, __func__, format ,##__VA_ARGS__)
|
||||
|
||||
#define returnError(FORMAT, ...) {\
|
||||
setError(FORMAT, ##__VA_ARGS__);\
|
||||
return false;\
|
||||
}
|
||||
|
||||
#define returnErrorIf(STATEMENT, FORMAT, ...) if(STATEMENT) returnError(FORMAT, ##__VA_ARGS__)
|
||||
|
||||
#define returnErrorIf_auto(STATEMENT) returnErrorIf(STATEMENT, #STATEMENT)
|
||||
|
||||
typedef struct CodePos {
|
||||
u32 line; // 0 on error
|
||||
u32 column; // 0 on error
|
||||
} CodePos;
|
||||
|
||||
#define CodePos_create(L, C) ((CodePos){ .line = L, .column = C })
|
||||
|
||||
/// @param pos index in code buffer
|
||||
CodePos Compiler_getLineAndColumn(Compiler* cmp, u32 pos);
|
||||
|
||||
char* Compiler_extractTokenStr(Compiler* cmp, Token t);
|
||||
|
||||
bool Compiler_lex(Compiler* cmp);
|
||||
bool Compiler_parse(Compiler* cmp);
|
||||
@ -1,3 +1,68 @@
|
||||
#include "compiler.h"
|
||||
#include "Compiler_internal.h"
|
||||
|
||||
List_define(DataDefinition);
|
||||
#define setError(FORMAT, ...) {\
|
||||
cmp->pos = cmp->tokens.data[cmp->tok_i].begin;\
|
||||
Compiler_setError(cmp, FORMAT, ##__VA_ARGS__);\
|
||||
}
|
||||
|
||||
#define setError_unexpectedToken(T) {\
|
||||
char* tok_str = Compiler_extractTokenStr(cmp, T);\
|
||||
setError("unexpected character '%s'", tok_str);\
|
||||
free(tok_str);\
|
||||
}
|
||||
|
||||
|
||||
static void parseDataDefinition(Compiler* cmp, char* instr_name, DataDefinition* dataDefPtr){
|
||||
|
||||
}
|
||||
|
||||
|
||||
static void parseOperation(Compiler* cmp, char* instr_name, Operation* operPtr){
|
||||
|
||||
}
|
||||
|
||||
bool Compiler_parse(Compiler* cmp){
|
||||
returnErrorIf_auto(cmp->state != CompilerState_Lexing);
|
||||
cmp->state = CompilerState_Parsing;
|
||||
Token tok;
|
||||
Section* sec = NULL;
|
||||
|
||||
while(cmp->tok_i < cmp->tokens.len){
|
||||
tok = cmp->tokens.data[cmp->tok_i];
|
||||
switch(tok.type){
|
||||
case TokenType_Unset:
|
||||
returnError("token of undefined type");
|
||||
case TokenType_SingleLineComment:
|
||||
case TokenType_MultiLineComment:
|
||||
// skip comments
|
||||
break;
|
||||
case TokenType_Label:
|
||||
// create new section
|
||||
sec = List_Section_expand(&cmp->ast.sections);
|
||||
Section_init(sec, Compiler_extractTokenStr(cmp, tok));
|
||||
break;
|
||||
case TokenType_Instruction:
|
||||
char* instr_name = Compiler_extractTokenStr(cmp, tok);
|
||||
// data definition starts with const
|
||||
if(cstr_seek(instr_name, "const", 0, 1)){
|
||||
DataDefinition* dataDefPtr = List_DataDefinition_expand(&sec->data);
|
||||
parseDataDefinition(cmp, instr_name, dataDefPtr);
|
||||
}
|
||||
else {
|
||||
Operation* operPtr = List_Operation_expand(&sec->code);
|
||||
parseOperation(cmp, instr_name, operPtr);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
setError_unexpectedToken(tok);
|
||||
return false;
|
||||
}
|
||||
|
||||
if(cmp->state == CompilerState_Error)
|
||||
return false;
|
||||
|
||||
cmp->tok_i++;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -1,19 +1,19 @@
|
||||
#include "token.h"
|
||||
#include "Token.h"
|
||||
|
||||
List_define(Token);
|
||||
|
||||
static cstr _TokenType_str[] = {
|
||||
"Unset",
|
||||
"SingleLineComment",
|
||||
"MultiLineComment",
|
||||
"Instruction",
|
||||
"Label",
|
||||
"DataDefinition",
|
||||
"Number",
|
||||
"Char",
|
||||
"String",
|
||||
"Instruction",
|
||||
"Register",
|
||||
"DataType",
|
||||
"DataPointer",
|
||||
"DataSize"
|
||||
"Name",
|
||||
"NamedDataPointer",
|
||||
"NamedDataSize"
|
||||
};
|
||||
|
||||
cstr TokenType_toString(TokenType t){
|
||||
|
||||
@ -1,20 +1,19 @@
|
||||
#pragma once
|
||||
#include "../std.h"
|
||||
#include "../collections/List.h"
|
||||
|
||||
typedef enum TokenType {
|
||||
TokenType_Unset,
|
||||
TokenType_SingleLineComment,
|
||||
TokenType_MultiLineComment,
|
||||
TokenType_Label,
|
||||
TOkenType_DataDefinition,
|
||||
TokenType_Number,
|
||||
TokenType_Char,
|
||||
TokenType_String,
|
||||
TokenType_Instruction,
|
||||
TokenType_Register,
|
||||
TokenType_DataType,
|
||||
TokenType_DataPointer,
|
||||
TokenType_DataSize
|
||||
TokenType_Unset, // initial value
|
||||
TokenType_SingleLineComment, // //comment
|
||||
TokenType_MultiLineComment, // /* comment */
|
||||
TokenType_Instruction, // abc
|
||||
TokenType_Label, // .abc:
|
||||
TokenType_Number, // 0123
|
||||
TokenType_Char, // 'A'
|
||||
TokenType_String, // "aaaa"
|
||||
TokenType_Name, // xyz
|
||||
TokenType_NamedDataPointer, // @xyz
|
||||
TokenType_NamedDataSize // #xyz
|
||||
} TokenType;
|
||||
|
||||
cstr TokenType_toString(TokenType t);
|
||||
@ -25,4 +24,6 @@ typedef struct Token {
|
||||
TokenType type : 8; // type of token (8 bits)
|
||||
} Token;
|
||||
|
||||
List_declare(Token);
|
||||
|
||||
#define Token_construct(TYPE, BEGIN, END) ((Token){ .type = TYPE, .begin = BEGIN, .length = END })
|
||||
|
||||
40
src/cstr.c
40
src/cstr.c
@ -50,3 +50,43 @@ char* NULLABLE(vsprintf_malloc)(size_t buffer_size, cstr format, va_list argv){
|
||||
}
|
||||
return buf;
|
||||
}
|
||||
|
||||
i32 cstr_seek(const char* src, const char* fragment, u32 startIndex, u32 seekLength){
|
||||
char sc = *src, fc = *fragment;
|
||||
if(sc == 0 || fc == 0)
|
||||
return -1;
|
||||
u32 fr_start = startIndex;
|
||||
for(u32 si = startIndex; si-startIndex < seekLength && sc != 0; si++){
|
||||
sc = src[si];
|
||||
fc = fragment[si-fr_start];
|
||||
if(fc == 0)
|
||||
return fr_start;
|
||||
if(sc != fc)
|
||||
fr_start++;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
i32 cstr_seekReverse(const char* src, const char* fragment, u32 startIndex, u32 seekLength){
|
||||
char sc = *src, fc = *fragment;
|
||||
if(sc == 0 || fc == 0)
|
||||
return -1;
|
||||
i32 len = strlen(src);
|
||||
if(startIndex == (u32)-1)
|
||||
startIndex = len-1;
|
||||
u32 fr_len = strlen(fragment);
|
||||
for(u32 si = startIndex; si < (u32)-1 && si != (len - seekLength - 1); si--){
|
||||
if(si + 1 < fr_len)
|
||||
return -1;
|
||||
sc = src[si];
|
||||
fc = fragment[0];
|
||||
u32 fr_start = si;
|
||||
for(u32 fi = 0; fc == sc ; fi++){
|
||||
if(fi == fr_len)
|
||||
return fr_start;
|
||||
fc = fragment[fi];
|
||||
sc = src[si--];
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
@ -29,7 +29,7 @@ const Instruction instructions[] = {
|
||||
// Instruction_construct(CALL),
|
||||
};
|
||||
|
||||
const Instruction* Instruction_getFromOpcode(Opcode opcode){
|
||||
const Instruction* Instruction_getByOpcode(Opcode opcode){
|
||||
if(opcode >= ARRAY_SIZE(instructions))
|
||||
return NULL;
|
||||
|
||||
|
||||
@ -33,4 +33,4 @@ typedef struct Instruction {
|
||||
/// @brief get instruction info from table
|
||||
/// @param opcode any byte
|
||||
/// @return ptr to struct or NULL
|
||||
const Instruction* NULLABLE(Instruction_getFromOpcode)(Opcode opcode);
|
||||
const Instruction* NULLABLE(Instruction_getByOpcode)(Opcode opcode);
|
||||
|
||||
23
src/main.c
23
src/main.c
@ -1,11 +1,11 @@
|
||||
#include "VM/VM.h"
|
||||
#include "instructions/instructions.h"
|
||||
#include "collections/List.h"
|
||||
#include "compiler/compiler.h"
|
||||
#include "compiler/Compiler.h"
|
||||
|
||||
#define arg_is(STR) (strcmp(argv[argi], STR) == 0)
|
||||
|
||||
i32 compileSources(cstr source_file, cstr out_file);
|
||||
i32 compileSources(cstr source_file, cstr out_file, bool debug_log);
|
||||
i32 bootFromImage(cstr image_file);
|
||||
|
||||
i32 main(const i32 argc, cstr* argv){
|
||||
@ -21,6 +21,8 @@ i32 main(const i32 argc, cstr* argv){
|
||||
cstr NULLABLE(out_file) = NULL;
|
||||
cstr NULLABLE(source_file) = NULL;
|
||||
|
||||
bool debug_log = false;
|
||||
|
||||
for(i32 argi = 1; argi < argc; argi++){
|
||||
if(arg_is("-h") || arg_is("--help")){
|
||||
printf(
|
||||
@ -28,12 +30,13 @@ i32 main(const i32 argc, cstr* argv){
|
||||
"-op, --opcodes Show list of all instructions.\n"
|
||||
"-i, --image [FILE] Boot VM using image file.\n"
|
||||
"-c, --compile [SOURCE_FILE] [OUT_FILE] Compile assembly source files to machine code.\n"
|
||||
"-d, --debug Enable debug log.\n"
|
||||
);
|
||||
return 0;
|
||||
}
|
||||
else if(arg_is("-op") || arg_is("--opcodes")){
|
||||
for(u8 opcode = 0; opcode < 255; opcode++){
|
||||
const Instruction* instr = Instruction_getFromOpcode(opcode);
|
||||
const Instruction* instr = Instruction_getByOpcode(opcode);
|
||||
if(instr != NULL){
|
||||
printf("%02X %s\n", opcode, instr->name);
|
||||
}
|
||||
@ -72,6 +75,9 @@ i32 main(const i32 argc, cstr* argv){
|
||||
}
|
||||
out_file = argv[argi];
|
||||
}
|
||||
else if(arg_is("-d") || arg_is("--debug")){
|
||||
debug_log = true;
|
||||
}
|
||||
else {
|
||||
printfe("ERROR: unknown argument '%s'\n", argv[argi]);
|
||||
return 1;
|
||||
@ -80,7 +86,7 @@ i32 main(const i32 argc, cstr* argv){
|
||||
|
||||
i32 exit_code = 0;
|
||||
if(compile){
|
||||
exit_code = compileSources(source_file, out_file);
|
||||
exit_code = compileSources(source_file, out_file, debug_log);
|
||||
}
|
||||
if(exit_code == 0 && boot){
|
||||
exit_code = bootFromImage(image_file);
|
||||
@ -131,19 +137,20 @@ i32 bootFromImage(cstr image_file){
|
||||
return exit_code;
|
||||
}
|
||||
|
||||
i32 compileSources(cstr source_file, cstr out_file){
|
||||
i32 compileSources(cstr source_file, cstr out_file, bool debug_log){
|
||||
Compiler cmp;
|
||||
Compiler_init(&cmp);
|
||||
bool success = Compiler_compileTasm(&cmp, source_file, out_file, true);
|
||||
Compiler_free(&cmp);
|
||||
bool success = Compiler_compile(&cmp, source_file, out_file, debug_log);
|
||||
if(!success){
|
||||
if(cmp.error_message){
|
||||
printfe("COMPILER ERROR: %s\n", cmp.error_message);
|
||||
free(cmp.error_message);
|
||||
}
|
||||
else printfe("COMPILER ERROR: unknown (error_message is null)\n");
|
||||
Compiler_free(&cmp);
|
||||
return 111;
|
||||
}
|
||||
|
||||
|
||||
Compiler_free(&cmp);
|
||||
return 0;
|
||||
}
|
||||
|
||||
10
src/std.h
10
src/std.h
@ -57,3 +57,13 @@ char* NULLABLE(vsprintf_malloc)(size_t buffer_size, cstr format, va_list argv);
|
||||
static inline bool isAlphabeticalLower(char c) { return 'a' <= c && c <= 'z'; }
|
||||
static inline bool isAlphabeticalUpper(char c) { return 'A' <= c && c <= 'Z'; }
|
||||
static inline bool isDigit(char c) { return '0' <= c && c <= '9'; }
|
||||
|
||||
/// @param startIndex 0 ... src length
|
||||
/// @param seekLength 0 ... -1
|
||||
/// @return pos of first <fragment> inclusion in <src> or -1 if not found
|
||||
i32 cstr_seek(const char* src, const char* fragment, u32 startIndex, u32 seekLength);
|
||||
|
||||
/// @param startIndex -1 ... src length
|
||||
/// @param seekLength 0 ... -1
|
||||
/// @return pos of first <fragment> inclusion in <src> or -1 if not found
|
||||
i32 cstr_seekReverse(const char* src, const char* fragment, u32 startIndex, u32 seekLength);
|
||||
|
||||
Loading…
Reference in New Issue
Block a user