TCPU/src/compiler/Compiler.c

392 lines
14 KiB
C

#include "Compiler_internal.h"
void Compiler_construct(Compiler* cmp){
memset(cmp, 0, sizeof(Compiler));
cmp->state = CompilerState_Initial;
cmp->tokens = List_alloc(Token, 4096);
cmp->line_lengths = List_alloc(u32, 1024);
AST_construct(&cmp->ast);
BinaryObject_construct(&cmp->binary);
}
void Compiler_destroy(Compiler* cmp){
free(cmp->code.data);
free(cmp->tokens.data);
free(cmp->line_lengths.data);
AST_destroy(&cmp->ast);
BinaryObject_destroy(&cmp->binary);
}
CodePos Compiler_getLineAndColumn(Compiler* cmp, u32 pos){
u32 prev_lines_len = 0;
if(pos >= cmp->code.size)
return CodePos_create(0, 0);
for(u32 i = 0; i < List_len(&cmp->line_lengths, u32); i++){
u32 line_len = ((u32*)cmp->line_lengths.data)[i];
if(prev_lines_len + line_len > pos)
return CodePos_create(i + 1, pos + 1 - prev_lines_len);
prev_lines_len += line_len;
}
return CodePos_create(0, 0);
}
void _Compiler_setError(Compiler* cmp, cstr context, cstr format, ...){
// happens at the end of file
if(cmp->pos >= cmp->code.size)
cmp->pos = cmp->code.size - 1;
char position_str[32];
CodePos code_pos = Compiler_getLineAndColumn(cmp, cmp->pos);
sprintf(position_str, "[at %u:%u][", code_pos.line, code_pos.column);
char* real_format = strcat_malloc(position_str, context, "] ", format);
va_list argv;
va_start(argv, format);
char* NULLABLE(buf) = vsprintf_malloc(512, real_format, argv);
va_end(argv);
free(real_format);
if(buf == NULL){
buf = malloc(16);
strcpy(buf, "SPRINTF FAILED");
}
cmp->state = CompilerState_Error;
cmp->error_message = buf;
}
#define setError(FORMAT, ...) {\
Compiler_setError(cmp, FORMAT, ##__VA_ARGS__);\
}
str Compiler_constructTokenStr(Compiler* cmp, Token t){
str s = str_construct((char*)(cmp->code.data + t.begin), t.length, false);
return s;
}
static bool compileSection(Compiler* cmp, Section* sec){
u32 cs_index = List_len(&cmp->binary.comp_sec_list, CompiledSection);
CompiledSection* cs = List_expand_size(&cmp->binary.comp_sec_list, sizeof(CompiledSection));
CompiledSection_construct(cs, sec->name);
if(!HashMap_tryPush(&cmp->binary.comp_sec_i_map, cs->name, &cs_index)){
returnError("duplicate section '%s'", str_copy(sec->name).data);
}
// compile code
u8 zeroes[8] = {0, 0, 0, 0, 0, 0, 0, 0};
for(u32 i = 0; i < List_len(&sec->operations_list, Operation); i++){
Operation* op = (Operation*)sec->operations_list.data + i;
List_pushMany(&cs->bytes, u8, &op->opcode, sizeof(op->opcode));
for(u32 j = 0; j < List_len(&op->args, Argument); j++){
Argument* arg = (Argument*)op->args.data + j;
switch(arg->type){
case ArgumentType_VarDataName:
returnError("argument type 'VarDataName' is not supported yet");
case ArgumentType_Unset:
returnError("ArgumentType is not set");
default:
returnError("invalid ArgumentType %i", arg->type);
case ArgumentType_Register:
List_push(&cs->bytes, u8, arg->value.register_code);
break;
case ArgumentType_ConstValue:
List_pushMany(&cs->bytes, u8, &arg->value.i, 8);
break;
case ArgumentType_ConstDataPointer:
List_push(&cs->named_refs, NamedRef, NamedRef_construct(
arg->value.data_name,
NamedRefType_Ptr,
cs->bytes.size));
List_pushMany(&cs->bytes, u8, zeroes, 8);
break;
case ArgumentType_ConstDataSize:
List_push(&cs->named_refs, NamedRef, NamedRef_construct(
arg->value.data_name,
NamedRefType_Size,
cs->bytes.size));
List_pushMany(&cs->bytes, u8, zeroes, 8);
break;
}
}
}
// compile data
for(u32 i = 0; i < List_len(&sec->data_definitions_list, DataDefinition); i++){
DataDefinition* dd = (DataDefinition*)sec->data_definitions_list.data + i;
List_push(&cs->const_data_props_list, ConstDataProps,
ConstDataProps_construct(dd->name, dd->data_bytes.size, cs->bytes.size));
List_pushMany(&cs->bytes, u8, dd->data_bytes.data, dd->data_bytes.size);
}
// TODO: push padding
return true;
}
static bool compileBinary(Compiler* cmp){
returnErrorIf_auto(cmp->state != CompilerState_Parsing);
cmp->state = CompilerState_Compiling;
for(u32 i = 0; i < List_len(&cmp->ast.sections, Section); i++){
Section* sec = (Section*)cmp->ast.sections.data + i;
if(!compileSection(cmp, sec)){
return false;
}
}
// find main section
str main_sec_name = STR("main");
u32* main_sec_i_ptr = HashMap_tryGetPtr(&cmp->binary.comp_sec_i_map, main_sec_name);
if(main_sec_i_ptr == NULL){
returnError("no 'main' section was defined");
}
u32 main_sec_i = *main_sec_i_ptr;
cmp->binary.main_sec = (CompiledSection*)cmp->binary.comp_sec_list.data + main_sec_i;
// create linked list of CompiledSection where main is the first
CompiledSection* prev_sec = cmp->binary.main_sec;
u32 total_size = 0;
for(u32 i = 0; i < List_len(&cmp->binary.comp_sec_list, CompiledSection); i++){
CompiledSection* sec = (CompiledSection*)cmp->binary.comp_sec_list.data + i;
total_size += sec->bytes.size;
bool is_main_sec = str_equals(sec->name, main_sec_name);
if(!is_main_sec){
sec->offset = prev_sec->offset + prev_sec->bytes.size;
}
ConstDataProps cd = ConstDataProps_construct(sec->name, sec->bytes.size, sec->offset);
if(!HashMap_tryPush(&cmp->binary.const_data_props_map, cd.name, &cd)){
returnError("duplicate named data '%s'", str_copy(cd.name).data);
}
for(u32 j = 0; j < List_len(&sec->const_data_props_list, ConstDataProps); j++){
cd = ((ConstDataProps*)sec->const_data_props_list.data)[j];
cd.offset += sec->offset;
if(!HashMap_tryPush(&cmp->binary.const_data_props_map, cd.name, &cd)){
returnError("duplicate named data '%s'", str_copy(cd.name).data);
}
}
if(is_main_sec)
continue;
prev_sec->next = sec;
prev_sec = sec;
}
// insert calculated offsets into sections
for(u32 i = 0; i < List_len(&cmp->binary.comp_sec_list, CompiledSection); i++){
CompiledSection* sec = (CompiledSection*)cmp->binary.comp_sec_list.data + i;
for(u32 j = 0; j < List_len(&sec->named_refs, NamedRef); j++){
NamedRef* ref = (NamedRef*)sec->named_refs.data +j;
ConstDataProps* target_data = HashMap_tryGetPtr(
&cmp->binary.const_data_props_map, ref->name);
if(target_data == NULL){
returnError("can't find named data '%s'", str_copy(ref->name).data);
}
u64* ref_value_ptr = (void*)((u8*)sec->bytes.data + ref->offset);
switch(ref->type){
default:
returnError("invalid NamedRefType %i", ref->type);
case NamedRefType_Size:
*ref_value_ptr = target_data->size;
break;
case NamedRefType_Ptr:
*ref_value_ptr = target_data->offset;
break;
}
}
}
cmp->binary.total_size = total_size;
return true;
}
static bool writeBinaryFile(Compiler* cmp, FILE* f){
returnErrorIf_auto(cmp->state != CompilerState_Compiling);
CompiledSection* sec = cmp->binary.main_sec;
while(sec){
fwrite(sec->bytes.data, 1, sec->bytes.size, f);
fflush(f);
sec = sec->next;
}
//TODO: print warnings for unused sections
return true;
}
bool Compiler_compile(Compiler* cmp, cstr source_file_name, cstr out_file_name, bool debug_log){
FILE* f = fopen(source_file_name, "rb");
if(f == NULL)
returnError("ERROR: can't open file '%s'", source_file_name);
StringBuilder sb = StringBuilder_alloc(64 * 1024);
int ret;
while((ret = fgetc(f)) != EOF) {
StringBuilder_append_char(&sb, ret);
}
if(ferror(f)){
StringBuilder_destroy(&sb);
fclose(f);
returnError("can't read file '%s'", source_file_name);
}
fclose(f);
if(sb.buffer.size == 0){
StringBuilder_destroy(&sb);
returnError("soucre file is empty");
}
cmp->code = str_copy(StringBuilder_getStr(&sb));
StringBuilder_destroy(&sb);
f = fopen(out_file_name, "wb");
if(f == NULL){
returnError("ERROR: can't open file '%s'", out_file_name);
}
if(debug_log){
printf("===========================[%s]===========================\n", source_file_name);
fputs(cmp->code.data, stdout);
fputc('\n', stdout);
}
if(debug_log)
printf("===================================[lexing]===================================\n");
bool success = Compiler_lex(cmp);
if(debug_log){
printf("------------------------------------[lines]------------------------------------\n");
for(u32 i = 0; i < List_len(&cmp->line_lengths, u32); i++){
printf("[%u] length: %u\n", i+1, ((u32*)cmp->line_lengths.data)[i]);
}
printf("------------------------------------[tokens]-----------------------------------\n");
for(u32 i = 0; i < List_len(&cmp->tokens, Token); i++){
Token t = ((Token*)cmp->tokens.data)[i];
CodePos pos = Compiler_getLineAndColumn(cmp, t.begin);
char* tokstr = malloc(4096);
strncpy(tokstr, cmp->code.data + t.begin, t.length);
tokstr[t.length] = 0;
char* tokstr_stripped = tokstr;
while(*tokstr_stripped == '\r' || *tokstr_stripped == '\n'){
tokstr_stripped++;
}
printf("[l:%3u, c:%3u] %s '%s'\n",
pos.line, pos.column,
TokenType_toString(t.type).data, tokstr_stripped);
free(tokstr);
}
}
if(!success){
fclose(f);
return false;
}
if(debug_log)
printf("===================================[parsing]===================================\n");
success = Compiler_parse(cmp);
if (debug_log){
printf("-------------------------------------[AST]-------------------------------------\n");
for(u32 i = 0; i < List_len(&cmp->ast.sections, Section); i++){
Section* sec = (Section*)cmp->ast.sections.data + i;
str tmpstr = str_copy(sec->name);
printf("section '%s'\n", tmpstr.data);
free(tmpstr.data);
for(u32 j = 0; j < List_len(&sec->data_definitions_list, DataDefinition); j++){
DataDefinition* dd = (DataDefinition*)sec->data_definitions_list.data + j;
tmpstr = str_copy(dd->name);
printf(" const%u %s (len %u)\n", dd->element_size * 8, tmpstr.data,
dd->data_bytes.size/dd->element_size);
free(tmpstr.data);
}
for(u32 j = 0; j < List_len(&sec->operations_list, Operation); j++){
Operation* op = (Operation*)sec->operations_list.data + j;
const Instruction* instr = Instruction_getByOpcode(op->opcode);
if(instr == NULL){
fclose(f);
returnError("unknown opcode: %i", op->opcode)
}
printf(" %s", instr->name.data);
for(u32 k = 0; k < List_len(&op->args, Argument); k++){
Argument* arg = (Argument*)op->args.data + k;
printf(" %s(", ArgumentType_toString(arg->type).data);
switch(arg->type){
default:
fclose(f);
returnError("invalid argument type %i", arg->type);
case ArgumentType_Register:;
str register_name = RegisterCode_toString(arg->value.register_code);
printf("%s 0x%x", register_name.data, arg->value.register_code);
free(register_name.data);
break;
case ArgumentType_ConstValue:
printf(IFWIN("%lli", "%li"), arg->value.i);
break;
case ArgumentType_ConstDataPointer:
tmpstr = str_copy(arg->value.data_name);
printf("@%s", tmpstr.data);
free(tmpstr.data);
break;
case ArgumentType_ConstDataSize:
tmpstr = str_copy(arg->value.data_name);
printf("#%s", tmpstr.data);
free(tmpstr.data);
break;
case ArgumentType_VarDataName:
tmpstr = str_copy(arg->value.data_name);
printf("%s", tmpstr.data);
free(tmpstr.data);
break;
}
printf(")");
}
printf("\n");
}
}
}
if(!success){
fclose(f);
return false;
}
if(debug_log)
printf("==================================[compiling]==================================\n");
success = compileBinary(cmp);
if(debug_log){
for(u32 i = 0; i < List_len(&cmp->binary.comp_sec_list, CompiledSection); i++){
CompiledSection* sec = (CompiledSection*)cmp->binary.comp_sec_list.data + i;
str tmpstr = str_copy(sec->name);
printf("compiled section '%s' to %u bytes with offset 0x%x\n", tmpstr.data, sec->bytes.size, sec->offset);
free(tmpstr.data);
}
}
if(!success){
fclose(f);
return false;
}
if(debug_log)
printf("----------------------------[writing output to file]---------------------------\n");
success = writeBinaryFile(cmp, f);
fclose(f);
if(success){
cmp->state = CompilerState_Success;
}
return success;
}