diff --git a/src/compiler/AST.h b/src/compiler/AST.h index f3804bc..3216488 100644 --- a/src/compiler/AST.h +++ b/src/compiler/AST.h @@ -32,10 +32,9 @@ List_declare(Operation); typedef struct DataDefinition { - u32 element_size; - u32 count; cstr name; - void* data; + List_u8 data; + u32 element_size; } DataDefinition; List_declare(DataDefinition); diff --git a/src/compiler/Lexer.c b/src/compiler/Lexer.c index 427cc8f..dda035b 100644 --- a/src/compiler/Lexer.c +++ b/src/compiler/Lexer.c @@ -139,7 +139,7 @@ static void readArguments(Compiler* cmp){ } } - // end of line + // end of operation else if(c == '\r' || c == '\n' || c == ';'){ tok.length = cmp->pos - tok.begin; if(tok.length > 0) @@ -195,6 +195,8 @@ static void readInstruction(Compiler* cmp){ if(c == '\r' || c == '\n' || c == ';'){ tok.length = cmp->pos - tok.begin; List_Token_push(&cmp->tokens, tok); + tok = Token_construct(TokenType_OperationEnd, cmp->pos, 1); + List_Token_push(&cmp->tokens, tok); // cmp->line will be increased in lex() return; } @@ -204,6 +206,8 @@ static void readInstruction(Compiler* cmp){ tok.length = cmp->pos - tok.begin; List_Token_push(&cmp->tokens, tok); readArguments(cmp); + tok = Token_construct(TokenType_OperationEnd, cmp->pos, 1); + List_Token_push(&cmp->tokens, tok); return; } @@ -219,6 +223,8 @@ static void readInstruction(Compiler* cmp){ // end of file tok.length = cmp->pos - tok.begin; List_Token_push(&cmp->tokens, tok); + tok = Token_construct(TokenType_OperationEnd, cmp->pos, 1); + List_Token_push(&cmp->tokens, tok); } bool Compiler_lex(Compiler* cmp){ diff --git a/src/compiler/Parser.c b/src/compiler/Parser.c index ebf366b..c0b591b 100644 --- a/src/compiler/Parser.c +++ b/src/compiler/Parser.c @@ -7,13 +7,136 @@ #define setError_unexpectedToken(T) {\ char* tok_str = Compiler_extractTokenStr(cmp, T);\ - setError("unexpected character '%s'", tok_str);\ + cmp->pos = T.begin;\ + Compiler_setError(cmp, "unexpected token '%s'", tok_str);\ free(tok_str);\ } +#define setError_unexpectedTokenChar(T, I) {\ + cmp->pos = T.begin + I;\ + Compiler_setError(cmp, "unexpected token '%c'", cmp->code[cmp->pos]);\ +} -static void parseDataDefinition(Compiler* cmp, char* instr_name, DataDefinition* dataDefPtr){ +#define Error_TokenUnset "token of undefined type" +#define Error_BitSize "invalid size in bits" + +static void List_u8_pushBytes(List_u8* l, void* value, u32 startIndex, u32 count){ + u8* v = value; + for(u32 byte_i = startIndex; byte_i < startIndex + count; byte_i++){ + List_u8_push(l, v[byte_i]); + } +} + +static inline bool isVarSizeBits(u32 B) { return (B == 8 && B == 16 && B == 32 && B == 64); } + +static NULLABLE(u8*) resolveEscapeSequences(Compiler* cmp, cstr src){ + u32 len = strlen(src); + List_u8 resolved = List_u8_alloc(len); + char c; + bool escaped = false; + for(u32 i = 0; i < len; i++){ + c = src[i]; + if(c == '\\'){ + escaped = !escaped; + continue; + } + + if(!escaped){ + List_u8_push(&resolved, c); + continue; + } + + // escape codes + switch(c){ + case '0': + List_u8_push(&resolved, '\0'); + break; + case 'n': + List_u8_push(&resolved, '\n'); + break; + case 'r': + List_u8_push(&resolved, '\r'); + break; + case 't': + List_u8_push(&resolved, '\t'); + break; + case 'e': + List_u8_push(&resolved, '\e'); + break; + default: + setError_unexpectedTokenChar(cmp->tokens.data[cmp->tok_i], i); + free(resolved.data); + return NULL; + } + } + + return resolved.data; +} + +static void parseDataDefinition(Compiler* cmp, char* instr_name, DataDefinition* ddf){ + i32 _element_size_bits; + if(sscanf(instr_name, "const%i", &_element_size_bits) != 1 || !isVarSizeBits(_element_size_bits)){ + setError(Error_BitSize); + return; + } + ddf->element_size = _element_size_bits / 8; + + Token tok = cmp->tokens.data[++cmp->tok_i]; + char* tok_str = Compiler_extractTokenStr(cmp, tok); + u8* processed_str = NULL; + i32 len = 0; + ddf->name = tok_str; + + while(++cmp->tok_i < cmp->tokens.len){ + switch(tok.type){ + case TokenType_Unset: + setError(Error_TokenUnset); + return; + case TokenType_SingleLineComment: + case TokenType_MultiLineComment: + // skip comments + break; + case TokenType_Number: + tok_str = Compiler_extractTokenStr(cmp, tok); + if(cstr_seekChar(tok_str, '.', 0, -1) != -1){ + f64 f = atof(tok_str); + List_u8_pushBytes(&ddf->data, &f, 8 - ddf->element_size, ddf->element_size); + } + else { + i64 i = atoll(tok_str); + List_u8_pushBytes(&ddf->data, &i, 8 - ddf->element_size, ddf->element_size); + } + break; + case TokenType_Char: + tok.begin += 1; + tok.length -= 2; + tok_str = Compiler_extractTokenStr(cmp, tok); + processed_str = resolveEscapeSequences(cmp, tok_str); + free(tok_str); + len = strlen(processed_str); + if(len != ddf->element_size){ + setError("can't fit char of size %i in %u bit variable", len, _element_size_bits); + return; + } + List_u8_pushBytes(&ddf->data, processed_str, 0, len); + break; + case TokenType_String: + tok.begin += 1; + tok.length -= 2; + tok_str = Compiler_extractTokenStr(cmp, tok); + processed_str = resolveEscapeSequences(cmp, tok_str); + free(tok_str); + len = strlen(processed_str); + List_u8_pushBytes(&ddf->data, processed_str, 0, len); + break; + case TokenType_OperationEnd: + return; + default: + setError_unexpectedToken(tok); + return; + } + } } @@ -31,7 +154,7 @@ bool Compiler_parse(Compiler* cmp){ tok = cmp->tokens.data[cmp->tok_i]; switch(tok.type){ case TokenType_Unset: - returnError("token of undefined type"); + returnError(Error_TokenUnset); case TokenType_SingleLineComment: case TokenType_MultiLineComment: // skip comments @@ -42,6 +165,8 @@ bool Compiler_parse(Compiler* cmp){ Section_init(sec, Compiler_extractTokenStr(cmp, tok)); break; case TokenType_Instruction: + if(sec == NULL) + returnError("no section"); char* instr_name = Compiler_extractTokenStr(cmp, tok); // data definition starts with const if(cstr_seek(instr_name, "const", 0, 1)){ diff --git a/src/compiler/Token.h b/src/compiler/Token.h index 2a4faad..082998b 100644 --- a/src/compiler/Token.h +++ b/src/compiler/Token.h @@ -13,7 +13,8 @@ typedef enum TokenType { TokenType_String, // "aaaa" TokenType_Name, // xyz TokenType_NamedDataPointer, // @xyz - TokenType_NamedDataSize // #xyz + TokenType_NamedDataSize, // #xyz + TokenType_OperationEnd, // EOL or EOF or ; } TokenType; cstr TokenType_toString(TokenType t); @@ -26,4 +27,4 @@ typedef struct Token { List_declare(Token); -#define Token_construct(TYPE, BEGIN, END) ((Token){ .type = TYPE, .begin = BEGIN, .length = END }) +#define Token_construct(TYPE, BEGIN, LEN) ((Token){ .type = TYPE, .begin = BEGIN, .length = LEN }) diff --git a/src/compiler/parser.c b/src/compiler/parser.c index ebf366b..c0b591b 100644 --- a/src/compiler/parser.c +++ b/src/compiler/parser.c @@ -7,13 +7,136 @@ #define setError_unexpectedToken(T) {\ char* tok_str = Compiler_extractTokenStr(cmp, T);\ - setError("unexpected character '%s'", tok_str);\ + cmp->pos = T.begin;\ + Compiler_setError(cmp, "unexpected token '%s'", tok_str);\ free(tok_str);\ } +#define setError_unexpectedTokenChar(T, I) {\ + cmp->pos = T.begin + I;\ + Compiler_setError(cmp, "unexpected token '%c'", cmp->code[cmp->pos]);\ +} -static void parseDataDefinition(Compiler* cmp, char* instr_name, DataDefinition* dataDefPtr){ +#define Error_TokenUnset "token of undefined type" +#define Error_BitSize "invalid size in bits" + +static void List_u8_pushBytes(List_u8* l, void* value, u32 startIndex, u32 count){ + u8* v = value; + for(u32 byte_i = startIndex; byte_i < startIndex + count; byte_i++){ + List_u8_push(l, v[byte_i]); + } +} + +static inline bool isVarSizeBits(u32 B) { return (B == 8 && B == 16 && B == 32 && B == 64); } + +static NULLABLE(u8*) resolveEscapeSequences(Compiler* cmp, cstr src){ + u32 len = strlen(src); + List_u8 resolved = List_u8_alloc(len); + char c; + bool escaped = false; + for(u32 i = 0; i < len; i++){ + c = src[i]; + if(c == '\\'){ + escaped = !escaped; + continue; + } + + if(!escaped){ + List_u8_push(&resolved, c); + continue; + } + + // escape codes + switch(c){ + case '0': + List_u8_push(&resolved, '\0'); + break; + case 'n': + List_u8_push(&resolved, '\n'); + break; + case 'r': + List_u8_push(&resolved, '\r'); + break; + case 't': + List_u8_push(&resolved, '\t'); + break; + case 'e': + List_u8_push(&resolved, '\e'); + break; + default: + setError_unexpectedTokenChar(cmp->tokens.data[cmp->tok_i], i); + free(resolved.data); + return NULL; + } + } + + return resolved.data; +} + +static void parseDataDefinition(Compiler* cmp, char* instr_name, DataDefinition* ddf){ + i32 _element_size_bits; + if(sscanf(instr_name, "const%i", &_element_size_bits) != 1 || !isVarSizeBits(_element_size_bits)){ + setError(Error_BitSize); + return; + } + ddf->element_size = _element_size_bits / 8; + + Token tok = cmp->tokens.data[++cmp->tok_i]; + char* tok_str = Compiler_extractTokenStr(cmp, tok); + u8* processed_str = NULL; + i32 len = 0; + ddf->name = tok_str; + + while(++cmp->tok_i < cmp->tokens.len){ + switch(tok.type){ + case TokenType_Unset: + setError(Error_TokenUnset); + return; + case TokenType_SingleLineComment: + case TokenType_MultiLineComment: + // skip comments + break; + case TokenType_Number: + tok_str = Compiler_extractTokenStr(cmp, tok); + if(cstr_seekChar(tok_str, '.', 0, -1) != -1){ + f64 f = atof(tok_str); + List_u8_pushBytes(&ddf->data, &f, 8 - ddf->element_size, ddf->element_size); + } + else { + i64 i = atoll(tok_str); + List_u8_pushBytes(&ddf->data, &i, 8 - ddf->element_size, ddf->element_size); + } + break; + case TokenType_Char: + tok.begin += 1; + tok.length -= 2; + tok_str = Compiler_extractTokenStr(cmp, tok); + processed_str = resolveEscapeSequences(cmp, tok_str); + free(tok_str); + len = strlen(processed_str); + if(len != ddf->element_size){ + setError("can't fit char of size %i in %u bit variable", len, _element_size_bits); + return; + } + List_u8_pushBytes(&ddf->data, processed_str, 0, len); + break; + case TokenType_String: + tok.begin += 1; + tok.length -= 2; + tok_str = Compiler_extractTokenStr(cmp, tok); + processed_str = resolveEscapeSequences(cmp, tok_str); + free(tok_str); + len = strlen(processed_str); + List_u8_pushBytes(&ddf->data, processed_str, 0, len); + break; + case TokenType_OperationEnd: + return; + default: + setError_unexpectedToken(tok); + return; + } + } } @@ -31,7 +154,7 @@ bool Compiler_parse(Compiler* cmp){ tok = cmp->tokens.data[cmp->tok_i]; switch(tok.type){ case TokenType_Unset: - returnError("token of undefined type"); + returnError(Error_TokenUnset); case TokenType_SingleLineComment: case TokenType_MultiLineComment: // skip comments @@ -42,6 +165,8 @@ bool Compiler_parse(Compiler* cmp){ Section_init(sec, Compiler_extractTokenStr(cmp, tok)); break; case TokenType_Instruction: + if(sec == NULL) + returnError("no section"); char* instr_name = Compiler_extractTokenStr(cmp, tok); // data definition starts with const if(cstr_seek(instr_name, "const", 0, 1)){ diff --git a/src/compiler/token.h b/src/compiler/token.h index 2a4faad..082998b 100644 --- a/src/compiler/token.h +++ b/src/compiler/token.h @@ -13,7 +13,8 @@ typedef enum TokenType { TokenType_String, // "aaaa" TokenType_Name, // xyz TokenType_NamedDataPointer, // @xyz - TokenType_NamedDataSize // #xyz + TokenType_NamedDataSize, // #xyz + TokenType_OperationEnd, // EOL or EOF or ; } TokenType; cstr TokenType_toString(TokenType t); @@ -26,4 +27,4 @@ typedef struct Token { List_declare(Token); -#define Token_construct(TYPE, BEGIN, END) ((Token){ .type = TYPE, .begin = BEGIN, .length = END }) +#define Token_construct(TYPE, BEGIN, LEN) ((Token){ .type = TYPE, .begin = BEGIN, .length = LEN }) diff --git a/src/cstr.c b/src/cstr.c index 37f0a36..2ea7687 100644 --- a/src/cstr.c +++ b/src/cstr.c @@ -89,4 +89,32 @@ i32 cstr_seekReverse(const char* src, const char* fragment, u32 startIndex, u32 } } return -1; -} \ No newline at end of file +} + + +i32 cstr_seekChar(const char* src, char fragment, u32 startIndex, u32 seekLength){ + char sc=*src; + if(sc==0 || fragment==0) + return -1; + for(u32 si=startIndex; si-startIndex inclusion in or -1 if not found i32 cstr_seekReverse(const char* src, const char* fragment, u32 startIndex, u32 seekLength); + +/// @param startIndex 0 ... src length +/// @param seekLength 0 ... -1 +/// @return pos of first inclusion in or -1 if not found +i32 cstr_seekChar(const char* src, char fragment, u32 startIndex, u32 seekLength); + +/// @param startIndex -1 ... src length +/// @param seekLength 0 ... -1 +/// @return pos of first inclusion in or -1 if not found +i32 cstr_seekCharReverse(const char* src, char fragment, u32 startIndex, u32 seekLength);