#include "Compiler_internal.h" void Compiler_construct(Compiler* cmp){ memset(cmp, 0, sizeof(Compiler)); cmp->state = CompilerState_Initial; cmp->tokens = List_alloc(Token, 4096); cmp->line_lengths = List_alloc(u32, 1024); AST_construct(&cmp->ast); BinaryObject_construct(&cmp->binary); } void Compiler_destroy(Compiler* cmp){ free(cmp->code.data); free(cmp->tokens.data); free(cmp->line_lengths.data); AST_destroy(&cmp->ast); BinaryObject_destroy(&cmp->binary); } CodePos Compiler_getLineAndColumn(Compiler* cmp, u32 pos){ u32 prev_lines_len = 0; if(pos >= cmp->code.size) return CodePos_create(0, 0); for(u32 i = 0; i < List_len(&cmp->line_lengths, u32); i++){ u32 line_len = ((u32*)cmp->line_lengths.data)[i]; if(prev_lines_len + line_len > pos) return CodePos_create(i + 1, pos + 1 - prev_lines_len); prev_lines_len += line_len; } return CodePos_create(0, 0); } void _Compiler_setError(Compiler* cmp, cstr context, cstr format, ...){ // happens at the end of file if(cmp->pos >= cmp->code.size) cmp->pos = cmp->code.size - 1; char position_str[32]; CodePos code_pos = Compiler_getLineAndColumn(cmp, cmp->pos); sprintf(position_str, "[at %u:%u][", code_pos.line, code_pos.column); char* real_format = strcat_malloc(position_str, context, "] ", format); va_list argv; va_start(argv, format); char* NULLABLE(buf) = vsprintf_malloc(512, real_format, argv); va_end(argv); free(real_format); if(buf == NULL){ buf = malloc(16); strcpy(buf, "SPRINTF FAILED"); } cmp->state = CompilerState_Error; cmp->error_message = buf; } #define setError(FORMAT, ...) {\ Compiler_setError(cmp, FORMAT, ##__VA_ARGS__);\ } str Compiler_constructTokenStr(Compiler* cmp, Token t){ str s = str_construct((char*)(cmp->code.data + t.begin), t.length, false); return s; } static bool compileSection(Compiler* cmp, Section* sec){ u32 cs_index = List_len(&cmp->binary.comp_sec_list, CompiledSection); CompiledSection* cs = List_expand_size(&cmp->binary.comp_sec_list, sizeof(CompiledSection)); CompiledSection_construct(cs, sec->name); if(!HashMap_tryPush(&cmp->binary.comp_sec_i_map, cs->name, &cs_index)){ returnError("duplicate section '%s'", str_copy(sec->name).data); } // compile code u8 zeroes[8] = {0, 0, 0, 0, 0, 0, 0, 0}; for(u32 i = 0; i < List_len(&sec->operations_list, Operation); i++){ Operation* op = (Operation*)sec->operations_list.data + i; List_pushMany(&cs->bytes, u8, &op->opcode, sizeof(op->opcode)); for(u32 j = 0; j < List_len(&op->args, Argument); j++){ Argument* arg = (Argument*)op->args.data + j; switch(arg->type){ case ArgumentType_VarDataName: returnError("argument type 'VarDataName' is not supported yet"); case ArgumentType_Unset: returnError("ArgumentType is not set"); default: returnError("invalid ArgumentType %i", arg->type); case ArgumentType_Register: List_push(&cs->bytes, u8, arg->value.register_code); break; case ArgumentType_ConstValue: List_pushMany(&cs->bytes, u8, &arg->value.i, 8); break; case ArgumentType_ConstDataPointer: List_push(&cs->named_refs, NamedRef, NamedRef_construct( arg->value.data_name, NamedRefType_Ptr, cs->bytes.size)); List_pushMany(&cs->bytes, u8, zeroes, 8); break; case ArgumentType_ConstDataSize: List_push(&cs->named_refs, NamedRef, NamedRef_construct( arg->value.data_name, NamedRefType_Size, cs->bytes.size)); List_pushMany(&cs->bytes, u8, zeroes, 8); break; } } } // compile data for(u32 i = 0; i < List_len(&sec->data_definitions_list, DataDefinition); i++){ DataDefinition* dd = (DataDefinition*)sec->data_definitions_list.data + i; List_push(&cs->const_data_props_list, ConstDataProps, ConstDataProps_construct(dd->name, dd->data_bytes.size, cs->bytes.size)); List_pushMany(&cs->bytes, u8, dd->data_bytes.data, dd->data_bytes.size); } // TODO: push padding return true; } static bool compileBinary(Compiler* cmp){ returnErrorIf_auto(cmp->state != CompilerState_Parsing); cmp->state = CompilerState_Compiling; for(u32 i = 0; i < List_len(&cmp->ast.sections, Section); i++){ Section* sec = (Section*)cmp->ast.sections.data + i; if(!compileSection(cmp, sec)){ return false; } } // find main section str main_sec_name = STR("main"); u32* main_sec_i_ptr = HashMap_tryGetPtr(&cmp->binary.comp_sec_i_map, main_sec_name); if(main_sec_i_ptr == NULL){ returnError("no 'main' section was defined"); } u32 main_sec_i = *main_sec_i_ptr; cmp->binary.main_sec = (CompiledSection*)cmp->binary.comp_sec_list.data + main_sec_i; // create linked list of CompiledSection where main is the first CompiledSection* prev_sec = cmp->binary.main_sec; u32 total_size = 0; for(u32 i = 0; i < List_len(&cmp->binary.comp_sec_list, CompiledSection); i++){ CompiledSection* sec = (CompiledSection*)cmp->binary.comp_sec_list.data + i; total_size += sec->bytes.size; bool is_main_sec = str_equals(sec->name, main_sec_name); if(!is_main_sec){ sec->offset = prev_sec->offset + prev_sec->bytes.size; } ConstDataProps cd = ConstDataProps_construct(sec->name, sec->bytes.size, sec->offset); if(!HashMap_tryPush(&cmp->binary.const_data_props_map, cd.name, &cd)){ returnError("duplicate named data '%s'", str_copy(cd.name).data); } for(u32 j = 0; j < List_len(&sec->const_data_props_list, ConstDataProps); j++){ cd = ((ConstDataProps*)sec->const_data_props_list.data)[j]; cd.offset += sec->offset; if(!HashMap_tryPush(&cmp->binary.const_data_props_map, cd.name, &cd)){ returnError("duplicate named data '%s'", str_copy(cd.name).data); } } if(is_main_sec) continue; prev_sec->next = sec; prev_sec = sec; } // insert calculated offsets into sections for(u32 i = 0; i < List_len(&cmp->binary.comp_sec_list, CompiledSection); i++){ CompiledSection* sec = (CompiledSection*)cmp->binary.comp_sec_list.data + i; for(u32 j = 0; j < List_len(&sec->named_refs, NamedRef); j++){ NamedRef* ref = (NamedRef*)sec->named_refs.data +j; ConstDataProps* target_data = HashMap_tryGetPtr( &cmp->binary.const_data_props_map, ref->name); if(target_data == NULL){ returnError("can't find named data '%s'", str_copy(ref->name).data); } u64* ref_value_ptr = (void*)((u8*)sec->bytes.data + ref->offset); switch(ref->type){ default: returnError("invalid NamedRefType %i", ref->type); case NamedRefType_Size: *ref_value_ptr = target_data->size; break; case NamedRefType_Ptr: *ref_value_ptr = target_data->offset; break; } } } cmp->binary.total_size = total_size; return true; } static bool writeBinaryFile(Compiler* cmp, FILE* f){ returnErrorIf_auto(cmp->state != CompilerState_Compiling); CompiledSection* sec = cmp->binary.main_sec; while(sec){ fwrite(sec->bytes.data, 1, sec->bytes.size, f); fflush(f); sec = sec->next; } //TODO: print warnings for unused sections return true; } bool Compiler_compile(Compiler* cmp, cstr source_file_name, cstr out_file_name, bool debug_log){ FILE* f = fopen(source_file_name, "rb"); if(f == NULL) returnError("ERROR: can't open file '%s'", source_file_name); StringBuilder sb = StringBuilder_alloc(64 * 1024); int ret; while((ret = fgetc(f)) != EOF) { StringBuilder_append_char(&sb, ret); } if(ferror(f)){ StringBuilder_destroy(&sb); fclose(f); returnError("can't read file '%s'", source_file_name); } fclose(f); if(sb.buffer.size == 0){ StringBuilder_destroy(&sb); returnError("soucre file is empty"); } cmp->code = str_copy(StringBuilder_getStr(&sb)); StringBuilder_destroy(&sb); f = fopen(out_file_name, "wb"); if(f == NULL){ returnError("ERROR: can't open file '%s'", out_file_name); } if(debug_log){ printf("===========================[%s]===========================\n", source_file_name); fputs(cmp->code.data, stdout); fputc('\n', stdout); } if(debug_log) printf("===================================[lexing]===================================\n"); bool success = Compiler_lex(cmp); if(debug_log){ printf("------------------------------------[lines]------------------------------------\n"); for(u32 i = 0; i < List_len(&cmp->line_lengths, u32); i++){ printf("[%u] length: %u\n", i+1, ((u32*)cmp->line_lengths.data)[i]); } printf("------------------------------------[tokens]-----------------------------------\n"); for(u32 i = 0; i < List_len(&cmp->tokens, Token); i++){ Token t = ((Token*)cmp->tokens.data)[i]; CodePos pos = Compiler_getLineAndColumn(cmp, t.begin); char* tokstr = malloc(4096); strncpy(tokstr, cmp->code.data + t.begin, t.length); tokstr[t.length] = 0; char* tokstr_stripped = tokstr; while(*tokstr_stripped == '\r' || *tokstr_stripped == '\n'){ tokstr_stripped++; } printf("[l:%3u, c:%3u] %s '%s'\n", pos.line, pos.column, TokenType_toString(t.type).data, tokstr_stripped); free(tokstr); } } if(!success){ fclose(f); return false; } if(debug_log) printf("===================================[parsing]===================================\n"); success = Compiler_parse(cmp); if (debug_log){ printf("-------------------------------------[AST]-------------------------------------\n"); for(u32 i = 0; i < List_len(&cmp->ast.sections, Section); i++){ Section* sec = (Section*)cmp->ast.sections.data + i; str tmpstr = str_copy(sec->name); printf("section '%s'\n", tmpstr.data); free(tmpstr.data); for(u32 j = 0; j < List_len(&sec->data_definitions_list, DataDefinition); j++){ DataDefinition* dd = (DataDefinition*)sec->data_definitions_list.data + j; tmpstr = str_copy(dd->name); printf(" const%u %s (len %u)\n", dd->element_size * 8, tmpstr.data, dd->data_bytes.size/dd->element_size); free(tmpstr.data); } for(u32 j = 0; j < List_len(&sec->operations_list, Operation); j++){ Operation* op = (Operation*)sec->operations_list.data + j; const Instruction* instr = Instruction_getByOpcode(op->opcode); if(instr == NULL){ fclose(f); returnError("unknown opcode: %i", op->opcode) } printf(" %s", instr->name.data); for(u32 k = 0; k < List_len(&op->args, Argument); k++){ Argument* arg = (Argument*)op->args.data + k; printf(" %s(", ArgumentType_toString(arg->type).data); switch(arg->type){ default: fclose(f); returnError("invalid argument type %i", arg->type); case ArgumentType_Register:; str register_name = RegisterCode_toString(arg->value.register_code); printf("%s 0x%x", register_name.data, arg->value.register_code); free(register_name.data); break; case ArgumentType_ConstValue: printf(IFWIN("%lli", "%li"), arg->value.i); break; case ArgumentType_ConstDataPointer: tmpstr = str_copy(arg->value.data_name); printf("@%s", tmpstr.data); free(tmpstr.data); break; case ArgumentType_ConstDataSize: tmpstr = str_copy(arg->value.data_name); printf("#%s", tmpstr.data); free(tmpstr.data); break; case ArgumentType_VarDataName: tmpstr = str_copy(arg->value.data_name); printf("%s", tmpstr.data); free(tmpstr.data); break; } printf(")"); } printf("\n"); } } } if(!success){ fclose(f); return false; } if(debug_log) printf("==================================[compiling]==================================\n"); success = compileBinary(cmp); if(debug_log){ for(u32 i = 0; i < List_len(&cmp->binary.comp_sec_list, CompiledSection); i++){ CompiledSection* sec = (CompiledSection*)cmp->binary.comp_sec_list.data + i; str tmpstr = str_copy(sec->name); printf("compiled section '%s' to %u bytes with offset 0x%x\n", tmpstr.data, sec->bytes.size, sec->offset); free(tmpstr.data); } } if(!success){ fclose(f); return false; } if(debug_log) printf("----------------------------[writing output to file]---------------------------\n"); success = writeBinaryFile(cmp, f); fclose(f); if(success){ cmp->state = CompilerState_Success; } return success; }