#include "Compiler_internal.h" #define setError(FORMAT, ...) {\ cmp->pos = cmp->tokens.data[cmp->tok_i].begin;\ Compiler_setError(cmp, FORMAT, ##__VA_ARGS__);\ } #define setError_unexpectedToken(T) {\ str tok_str_tmp = Compiler_constructTokenStr(cmp, T);\ cmp->pos = T.begin;\ Compiler_setError(cmp, "unexpected token '"FMT_str"'", tok_str_tmp.len, tok_str_tmp.data);\ } #define setError_unexpectedTokenChar(T, I) {\ cmp->pos = T.begin + I;\ Compiler_setError(cmp, "unexpected character '%c'", cmp->code.data[cmp->pos]);\ } #define setError_unexpectedInstruction(T) {\ str tok_str_tmp = Compiler_constructTokenStr(cmp, T);\ cmp->pos = T.begin;\ Compiler_setError(cmp, "unexpected instruction '"FMT_str"'", tok_str_tmp.len, tok_str_tmp.data);\ } #define Error_TokenUnset "token of undefined type" #define Error_BitSize "invalid size in bits" static inline bool isVarSizeBits(u32 B) { return (B == 8 || B == 16 || B == 32 || B == 64); } static NULLABLE(str) resolveEscapeSequences(Compiler* cmp, str src){ StringBuilder sb = StringBuilder_alloc(src.len); char c; bool escaped = false; for(u32 i = 0; i < src.len; i++){ c = src.data[i]; if(c == '\\'){ escaped = !escaped; if(escaped) continue; } if(!escaped){ StringBuilder_append_char(&sb, c); continue; } // escape codes switch(c){ case '0': StringBuilder_append_char(&sb, '\0'); break; case 'n': StringBuilder_append_char(&sb, '\n'); break; case 'r': StringBuilder_append_char(&sb, '\r'); break; case 't': StringBuilder_append_char(&sb, '\t'); break; case 'e': StringBuilder_append_char(&sb, '\e'); break; case '"': case '\'': StringBuilder_append_char(&sb, c); break; default: setError_unexpectedTokenChar(cmp->tokens.data[cmp->tok_i], i); StringBuilder_destroy(&sb); return str_null; } escaped = false; } return StringBuilder_getStr(&sb); } static void parseDataDefinition(Compiler* cmp, str instr_name, DataDefinition* ddf){ i32 _element_size_bits; str _instr_name_zero_terminated = str_copy(instr_name); if(sscanf(_instr_name_zero_terminated.data, "const%i", &_element_size_bits) != 1 || !isVarSizeBits(_element_size_bits)){ str_destroy(_instr_name_zero_terminated); setError(Error_BitSize); return; } str_destroy(_instr_name_zero_terminated); ddf->element_size = _element_size_bits / 8; ddf->data_bytes = List_u8_alloc(32); Token tok = cmp->tokens.data[++cmp->tok_i]; if(tok.type != TokenType_Name){ setError_unexpectedToken(tok); return; } str tok_str = Compiler_constructTokenStr(cmp, tok); str processed_str = str_null; ddf->name = tok_str; while(++cmp->tok_i < cmp->tokens.len){ tok = cmp->tokens.data[cmp->tok_i]; switch(tok.type){ case TokenType_SingleLineComment: case TokenType_MultiLineComment: // skip comments break; case TokenType_OperationEnd: return; case TokenType_Unset: setError(Error_TokenUnset); return; default: setError_unexpectedToken(tok); return; case TokenType_Number: tok_str = Compiler_constructTokenStr(cmp, tok); processed_str = str_copy(tok_str); if(str_seekChar(tok_str, '.', 0) != -1){ f64 f = atof(processed_str.data); // for numbers smaller than 64 bits u8* value_part = (u8*)&f + 8 - ddf->element_size; List_u8_pushMany(&ddf->data_bytes, value_part, ddf->element_size); } else { i64 i = atoll(processed_str.data); // for numbers smaller than 64 bits u8* value_part = (u8*)&i + 8 - ddf->element_size; List_u8_pushMany(&ddf->data_bytes, value_part, ddf->element_size); } str_destroy(processed_str); break; case TokenType_Char: tok.begin += 1; tok.length -= 2; tok_str = Compiler_constructTokenStr(cmp, tok); processed_str = resolveEscapeSequences(cmp, tok_str); if(processed_str.len != ddf->element_size){ setError("can't fit char of size %i in %u bit variable", processed_str.len, _element_size_bits); return; } List_u8_pushMany(&ddf->data_bytes, (u8*)processed_str.data, processed_str.len); str_destroy(processed_str); break; case TokenType_String: tok.begin += 1; tok.length -= 2; tok_str = Compiler_constructTokenStr(cmp, tok); processed_str = resolveEscapeSequences(cmp, tok_str); List_u8_pushMany(&ddf->data_bytes, (u8*)processed_str.data, processed_str.len); str_destroy(processed_str); break; } } } static void parseOperation(Compiler* cmp, str instr_name, Operation* operPtr){ Token tok = cmp->tokens.data[cmp->tok_i]; const Instruction* instr = Instruction_getByName(instr_name); if(instr == NULL){ setError_unexpectedInstruction(tok); return; } operPtr->opcode = instr->opcode; operPtr->args = List_Argument_alloc(4); Argument arg = (Argument){ .type = ArgumentType_Unset, .value.i = 0 }; str tok_str = str_null; str processed_str = str_null; while(++cmp->tok_i < cmp->tokens.len){ tok = cmp->tokens.data[cmp->tok_i]; switch(tok.type){ case TokenType_SingleLineComment: case TokenType_MultiLineComment: // skip comments break; case TokenType_OperationEnd: return; case TokenType_Unset: setError(Error_TokenUnset); return; default: setError_unexpectedToken(tok); return; case TokenType_Number: arg.type = ArgumentType_ConstValue; tok_str = Compiler_constructTokenStr(cmp, tok); processed_str = str_copy(tok_str); if(str_seekChar(tok_str, '.', 0) != -1){ arg.value.f = atof(processed_str.data); } else { arg.value.i = atoll(processed_str.data); } str_destroy(processed_str); List_Argument_push(&operPtr->args, arg); break; case TokenType_Name: tok_str = Compiler_constructTokenStr(cmp, tok); arg.value.register_code = RegisterCode_parse(tok_str); if(arg.value.register_code != RegisterCode_Unset){ arg.type = ArgumentType_Register; } else { arg.type = ArgumentType_VarDataName; arg.value.data_name = tok_str; } List_Argument_push(&operPtr->args, arg); break; case TokenType_NamedDataPointer: tok_str = Compiler_constructTokenStr(cmp, tok); tok_str.data++; tok_str.len--; arg.type = ArgumentType_ConstDataPointer; arg.value.data_name = tok_str; List_Argument_push(&operPtr->args, arg); break; case TokenType_NamedDataSize: tok_str = Compiler_constructTokenStr(cmp, tok); tok_str.data++; tok_str.len--; arg.type = ArgumentType_ConstDataSize; arg.value.data_name = tok_str; List_Argument_push(&operPtr->args, arg); break; } } } bool Compiler_parse(Compiler* cmp){ returnErrorIf_auto(cmp->state != CompilerState_Lexing); cmp->state = CompilerState_Parsing; Token tok; Section* sec = NULL; while(cmp->tok_i < cmp->tokens.len){ tok = cmp->tokens.data[cmp->tok_i]; switch(tok.type){ case TokenType_Unset: returnError(Error_TokenUnset); case TokenType_SingleLineComment: case TokenType_MultiLineComment: // skip comments break; case TokenType_Label: // create new section sec = List_Section_expand(&cmp->ast.sections, 1); Section_construct(sec, Compiler_constructTokenStr(cmp, tok)); break; case TokenType_Instruction: if(sec == NULL) returnError("no section"); str instr_name = Compiler_constructTokenStr(cmp, tok); // data definition starts with const if(str_startsWith(instr_name, STR("const"))){ DataDefinition* dataDefPtr = List_DataDefinition_expand(&sec->data_definitions_list, 1); memset(dataDefPtr, 0, sizeof(DataDefinition)); parseDataDefinition(cmp, instr_name, dataDefPtr); } else { Operation* operPtr = List_Operation_expand(&sec->operations_list, 1); memset(operPtr, 0, sizeof(Operation)); parseOperation(cmp, instr_name, operPtr); } break; default: setError_unexpectedToken(tok); return false; } if(cmp->state == CompilerState_Error) return false; cmp->tok_i++; } return true; }