From 823223ffa7f820ad24b5972a5067a91bcfc91749 Mon Sep 17 00:00:00 2001 From: Timerix Date: Sun, 2 Feb 2025 19:18:01 +0500 Subject: [PATCH] cstr -> str where possible --- src/VM/VM.h | 1 + src/collections/List.c | 1 - src/collections/List.h | 1 - src/compiler/AST.c | 22 ++++----- src/compiler/AST.h | 9 ++-- src/compiler/Compiler.c | 40 +++++++-------- src/compiler/Compiler.h | 4 +- src/compiler/Compiler_internal.h | 3 +- src/compiler/Lexer.c | 38 +++++++-------- src/compiler/Parser.c | 84 ++++++++++++++++---------------- src/compiler/Token.c | 28 +++++------ src/compiler/Token.h | 3 +- src/instructions/instructions.h | 4 +- src/main.c | 2 +- 14 files changed, 119 insertions(+), 121 deletions(-) diff --git a/src/VM/VM.h b/src/VM/VM.h index 19802c7..7222c75 100644 --- a/src/VM/VM.h +++ b/src/VM/VM.h @@ -1,5 +1,6 @@ #pragma once #include "../std.h" +#include "../string/str.h" typedef union Register { u32 u32v; diff --git a/src/collections/List.c b/src/collections/List.c index 6b610a9..2597efb 100644 --- a/src/collections/List.c +++ b/src/collections/List.c @@ -1,5 +1,4 @@ #include "List.h" -List_define(cstr); List_define(u32); List_define(u8); diff --git a/src/collections/List.h b/src/collections/List.h index b72cb8d..545fe7f 100644 --- a/src/collections/List.h +++ b/src/collections/List.h @@ -54,6 +54,5 @@ }\ -List_declare(cstr); List_declare(u32); List_declare(u8); diff --git a/src/compiler/AST.c b/src/compiler/AST.c index f5d4625..b17371c 100644 --- a/src/compiler/AST.c +++ b/src/compiler/AST.c @@ -5,30 +5,30 @@ List_define(Operation); List_define(DataDefinition); List_define(Section); -static cstr _ArgumentType_str[] = { - "Unset", - "Register", - "ConstValue", - "DataName", - "NamedDataPointer", - "NamedDataSize", +static str _ArgumentType_str[] = { + STR("Unset"), + STR("Register"), + STR("ConstValue"), + STR("DataName"), + STR("NamedDataPointer"), + STR("NamedDataSize"), }; -cstr ArgumentType_toString(ArgumentType t){ +str ArgumentType_toString(ArgumentType t){ if(t >= ARRAY_SIZE(_ArgumentType_str)) - return "!!INDEX_ERROR!!"; + return STR("!!INDEX_ERROR!!"); return _ArgumentType_str[t]; } -void Section_init(Section* sec, char* name){ +void Section_init(Section* sec, str name){ sec->name = name; sec->data = List_DataDefinition_alloc(256); sec->code = List_Operation_alloc(1024); } void Section_free(Section* sec){ - free(sec->name); + free(sec->name.data); free(sec->data.data); free(sec->code.data); } diff --git a/src/compiler/AST.h b/src/compiler/AST.h index 3216488..651d961 100644 --- a/src/compiler/AST.h +++ b/src/compiler/AST.h @@ -1,5 +1,6 @@ #pragma once #include "../std.h" +#include "../string/str.h" #include "../instructions/instructions.h" #include "../collections/List.h" @@ -12,7 +13,7 @@ typedef enum ArgumentType { ArgumentType_NamedDataSize, } ArgumentType; -cstr ArgumentType_toString(ArgumentType t); +str ArgumentType_toString(ArgumentType t); typedef struct Argument { @@ -32,7 +33,7 @@ List_declare(Operation); typedef struct DataDefinition { - cstr name; + str name; List_u8 data; u32 element_size; } DataDefinition; @@ -41,14 +42,14 @@ List_declare(DataDefinition); typedef struct Section { - char* name; + str name; List_DataDefinition data; List_Operation code; } Section; List_declare(Section); -void Section_init(Section* Section, char* name); +void Section_init(Section* Section, str name); void Section_free(Section* Section); typedef struct AST { diff --git a/src/compiler/Compiler.c b/src/compiler/Compiler.c index 853a453..f2e4f0d 100644 --- a/src/compiler/Compiler.c +++ b/src/compiler/Compiler.c @@ -1,6 +1,5 @@ #include "Compiler_internal.h" - void Compiler_init(Compiler* cmp){ memset(cmp, 0, sizeof(Compiler)); cmp->state = CompilerState_Initial; @@ -10,7 +9,7 @@ void Compiler_init(Compiler* cmp){ } void Compiler_free(Compiler* cmp){ - free(cmp->code); + free(cmp->code.data); free(cmp->tokens.data); free(cmp->line_lengths.data); AST_free(&cmp->ast); @@ -18,7 +17,7 @@ void Compiler_free(Compiler* cmp){ CodePos Compiler_getLineAndColumn(Compiler* cmp, u32 pos){ u32 prev_lines_len = 0; - if(pos >= cmp->code_len) + if(pos >= cmp->code.len) return CodePos_create(0, 0); for(u32 i = 0; i < cmp->line_lengths.len; i++){ @@ -33,8 +32,8 @@ CodePos Compiler_getLineAndColumn(Compiler* cmp, u32 pos){ void _Compiler_setError(Compiler* cmp, cstr context, cstr format, ...){ // happens at the end of file - if(cmp->pos >= cmp->code_len) - cmp->pos = cmp->code_len - 1; + if(cmp->pos >= cmp->code.len) + cmp->pos = cmp->code.len - 1; char position_str[32]; CodePos code_pos = Compiler_getLineAndColumn(cmp, cmp->pos); sprintf(position_str, "[at %u:%u][", code_pos.line, code_pos.column); @@ -56,10 +55,8 @@ void _Compiler_setError(Compiler* cmp, cstr context, cstr format, ...){ Compiler_setError(cmp, FORMAT, ##__VA_ARGS__);\ } -char* Compiler_extractTokenStr(Compiler* cmp, Token t){ - char* s = malloc(t.length + 1); - memcpy(s, cmp->code, t.length); - s[t.length] = 0; +str Compiler_constructTokenStr(Compiler* cmp, Token t){ + str s = str_construct((char*)(cmp->code.data + t.begin), t.length, false); return s; } @@ -75,37 +72,34 @@ bool Compiler_compile(Compiler* cmp, cstr source_file_name, cstr out_file_name, if(f == NULL) returnError("ERROR: can't open file '%s'", source_file_name); - List_u8 buf = List_u8_alloc(64 * 1024); + StringBuilder sb = StringBuilder_alloc(64 * 1024); int ret; while((ret = fgetc(f)) != EOF) { - List_u8_push(&buf, ret); + StringBuilder_append_char(&sb, ret); } if(ferror(f)){ - free(buf.data); + StringBuilder_free(&sb); fclose(f); returnError("can't read file '%s'", source_file_name); } fclose(f); - if(buf.len == 0){ - free(buf.data); - fclose(f); + if(sb.buffer.len == 0){ + StringBuilder_free(&sb); returnError("soucre file is empty"); } - cmp->code = (char*)buf.data; - cmp->code_len = buf.len; - List_u8_push(&buf, 0); + cmp->code = str_copy(StringBuilder_getStr(&sb)); + StringBuilder_free(&sb); f = fopen(out_file_name, "wb"); if(f == NULL){ - free(buf.data); returnError("ERROR: can't open file '%s'", out_file_name); } if(debug_log){ printf("----------------------------------[%s]---------------------------------\n", source_file_name); - fputs(cmp->code, stdout); + fputs(cmp->code.data, stdout); fputc('\n', stdout); } @@ -120,11 +114,11 @@ bool Compiler_compile(Compiler* cmp, cstr source_file_name, cstr out_file_name, Token t = cmp->tokens.data[i]; CodePos pos = Compiler_getLineAndColumn(cmp, t.begin); char* tokstr = malloc(4096); - strncpy(tokstr, cmp->code + t.begin, t.length); + strncpy(tokstr, cmp->code.data + t.begin, t.length); tokstr[t.length] = 0; printf("[l:%3u, c:%3u] %s '%s'\n", - pos.line, pos.column, - TokenType_toString(t.type), tokstr); + pos.line, pos.column, + TokenType_toString(t.type).data, tokstr); free(tokstr); } } diff --git a/src/compiler/Compiler.h b/src/compiler/Compiler.h index 6124102..26cb6ea 100644 --- a/src/compiler/Compiler.h +++ b/src/compiler/Compiler.h @@ -1,5 +1,6 @@ #pragma once #include "../std.h" +#include "../string/str.h" #include "../collections/List.h" #include "Token.h" #include "AST.h" @@ -14,8 +15,7 @@ typedef enum CompilerState { } CompilerState; typedef struct Compiler { - char* code; - u32 code_len; + str code; u32 column; // > 0 if code parsing started u32 pos; CompilerState state; diff --git a/src/compiler/Compiler_internal.h b/src/compiler/Compiler_internal.h index f3ce11c..c91fe2f 100644 --- a/src/compiler/Compiler_internal.h +++ b/src/compiler/Compiler_internal.h @@ -1,4 +1,5 @@ #include "Compiler.h" +#include "../string/StringBuilder.h" void _Compiler_setError(Compiler* cmp, cstr context, cstr format, ...) __attribute__((__format__(__printf__, 3, 4))); @@ -23,7 +24,7 @@ typedef struct CodePos { /// @param pos index in code buffer CodePos Compiler_getLineAndColumn(Compiler* cmp, u32 pos); -char* Compiler_extractTokenStr(Compiler* cmp, Token t); +str Compiler_constructTokenStr(Compiler* cmp, Token t); bool Compiler_lex(Compiler* cmp); bool Compiler_parse(Compiler* cmp); diff --git a/src/compiler/Lexer.c b/src/compiler/Lexer.c index dda035b..b0043bc 100644 --- a/src/compiler/Lexer.c +++ b/src/compiler/Lexer.c @@ -19,8 +19,8 @@ static void readCommentSingleLine(Compiler* cmp){ cmp->column++; cmp->pos++; - while(cmp->pos < cmp->code_len){ - c = cmp->code[cmp->pos]; + while(cmp->pos < cmp->code.len){ + c = cmp->code.data[cmp->pos]; // end of line if(c == '\r' || c == '\n'){ tok.length = cmp->pos - tok.begin; @@ -44,10 +44,10 @@ static void readCommentMultiLine(Compiler* cmp){ cmp->column++; cmp->pos++; - while(cmp->pos < cmp->code_len){ - c = cmp->code[cmp->pos]; + while(cmp->pos < cmp->code.len){ + c = cmp->code.data[cmp->pos]; // closing comment - if(cmp->pos > tok.begin + 3 && c == '/' && cmp->code[cmp->pos - 1] == '*') { + if(cmp->pos > tok.begin + 3 && c == '/' && cmp->code.data[cmp->pos - 1] == '*') { tok.length = cmp->pos - tok.begin + 1; List_Token_push(&cmp->tokens, tok); return; @@ -65,14 +65,14 @@ static void readCommentMultiLine(Compiler* cmp){ static void readComment(Compiler* cmp){ char c; // '/' - if(cmp->pos + 1 == cmp->code_len){ + if(cmp->pos + 1 == cmp->code.len){ setError(Error_endOfFile); return; } - c = cmp->code[cmp->pos + 1]; + c = cmp->code.data[cmp->pos + 1]; if(c == '\r' || c == '\n'){ - setError(Error_unexpectedCharacter(cmp->code[--cmp->pos])); + setError(Error_unexpectedCharacter(cmp->code.data[--cmp->pos])); return; } @@ -91,14 +91,14 @@ static void readLabel(Compiler* cmp){ cmp->column++; Token tok = Token_construct(TokenType_Label, cmp->pos, 0); - while(cmp->pos < cmp->code_len){ - c = cmp->code[cmp->pos]; + while(cmp->pos < cmp->code.len){ + c = cmp->code.data[cmp->pos]; // end of line if(c == ':' || c == '\r' || c == '\n'){ tok.length = cmp->pos - tok.begin; if(tok.length > 0) List_Token_push(&cmp->tokens, tok); - else setError(Error_unexpectedCharacter(cmp->code[--cmp->pos])); + else setError(Error_unexpectedCharacter(cmp->code.data[--cmp->pos])); // cmp->line will be increased in lex() return; } @@ -125,12 +125,12 @@ static void readArguments(Compiler* cmp){ Token tok = Token_construct(TokenType_Unset, cmp->pos, 0); char quot = '\0'; // quotation character of a string value - while(cmp->pos < cmp->code_len){ - c = cmp->code[cmp->pos]; + while(cmp->pos < cmp->code.len){ + c = cmp->code.data[cmp->pos]; // string argument reading if(quot != '\0'){ - if(c == quot && cmp->code[cmp->pos - 1] != '\\'){ + if(c == quot && cmp->code.data[cmp->pos - 1] != '\\'){ quot = '\0'; } else if(c == '\r' || c == '\n'){ @@ -189,8 +189,8 @@ static void readInstruction(Compiler* cmp){ cmp->pos++; cmp->column++; - while(cmp->pos < cmp->code_len){ - char c = cmp->code[cmp->pos]; + while(cmp->pos < cmp->code.len){ + char c = cmp->code.data[cmp->pos]; // end of line if(c == '\r' || c == '\n' || c == ';'){ tok.length = cmp->pos - tok.begin; @@ -232,8 +232,8 @@ bool Compiler_lex(Compiler* cmp){ cmp->state = CompilerState_Lexing; cmp->column = 1; - while(cmp->pos < cmp->code_len){ - char c = cmp->code[cmp->pos]; + while(cmp->pos < cmp->code.len){ + char c = cmp->code.data[cmp->pos]; switch(c){ // skip blank characters case ' ': case '\t': case '\r': case '\n': @@ -257,7 +257,7 @@ bool Compiler_lex(Compiler* cmp){ if(cmp->state == CompilerState_Error) return false; - c = cmp->code[cmp->pos]; + c = cmp->code.data[cmp->pos]; if(c == '\n') completeLine(cmp); cmp->column++; diff --git a/src/compiler/Parser.c b/src/compiler/Parser.c index 7af60a2..f479579 100644 --- a/src/compiler/Parser.c +++ b/src/compiler/Parser.c @@ -6,15 +6,15 @@ } #define setError_unexpectedToken(T) {\ - char* tok_str = Compiler_extractTokenStr(cmp, T);\ + str tok_str = str_copy(Compiler_constructTokenStr(cmp, T));\ cmp->pos = T.begin;\ - Compiler_setError(cmp, "unexpected token '%s'", tok_str);\ - free(tok_str);\ + Compiler_setError(cmp, "unexpected token '%s'", tok_str.data);\ + free(tok_str.data);\ } #define setError_unexpectedTokenChar(T, I) {\ cmp->pos = T.begin + I;\ - Compiler_setError(cmp, "unexpected token '%c'", cmp->code[cmp->pos]);\ + Compiler_setError(cmp, "unexpected token '%c'", cmp->code.data[cmp->pos]);\ } @@ -30,62 +30,63 @@ static void List_u8_pushBytes(List_u8* l, void* value, u32 startIndex, u32 count static inline bool isVarSizeBits(u32 B) { return (B == 8 && B == 16 && B == 32 && B == 64); } -static NULLABLE(u8*) resolveEscapeSequences(Compiler* cmp, cstr src){ - u32 len = strlen(src); - List_u8 resolved = List_u8_alloc(len); +static NULLABLE(str) resolveEscapeSequences(Compiler* cmp, str src){ + StringBuilder sb = StringBuilder_alloc(src.len); char c; bool escaped = false; - for(u32 i = 0; i < len; i++){ - c = src[i]; + for(u32 i = 0; i < src.len; i++){ + c = src.data[i]; if(c == '\\'){ escaped = !escaped; continue; } if(!escaped){ - List_u8_push(&resolved, c); + StringBuilder_append_char(&sb, c); continue; } // escape codes switch(c){ case '0': - List_u8_push(&resolved, '\0'); + StringBuilder_append_char(&sb, '\0'); break; case 'n': - List_u8_push(&resolved, '\n'); + StringBuilder_append_char(&sb, '\n'); break; case 'r': - List_u8_push(&resolved, '\r'); + StringBuilder_append_char(&sb, '\r'); break; case 't': - List_u8_push(&resolved, '\t'); + StringBuilder_append_char(&sb, '\t'); break; case 'e': - List_u8_push(&resolved, '\e'); + StringBuilder_append_char(&sb, '\e'); break; default: setError_unexpectedTokenChar(cmp->tokens.data[cmp->tok_i], i); - free(resolved.data); - return NULL; + StringBuilder_free(&sb); + return str_null; } } - return resolved.data; + return StringBuilder_getStr(&sb); } -static void parseDataDefinition(Compiler* cmp, char* instr_name, DataDefinition* ddf){ +static void parseDataDefinition(Compiler* cmp, str instr_name, DataDefinition* ddf){ i32 _element_size_bits; - if(sscanf(instr_name, "const%i", &_element_size_bits) != 1 || !isVarSizeBits(_element_size_bits)){ + str _instr_name_zero_terminated = str_copy(instr_name); + if(sscanf(_instr_name_zero_terminated.data, "const%i", &_element_size_bits) != 1 || !isVarSizeBits(_element_size_bits)){ + free(_instr_name_zero_terminated.data); setError(Error_BitSize); return; } + free(_instr_name_zero_terminated.data); ddf->element_size = _element_size_bits / 8; Token tok = cmp->tokens.data[++cmp->tok_i]; - char* tok_str = Compiler_extractTokenStr(cmp, tok); - u8* processed_str = NULL; - u32 len = 0; + str tok_str = Compiler_constructTokenStr(cmp, tok); + str processed_str = str_null; ddf->name = tok_str; while(++cmp->tok_i < cmp->tokens.len){ @@ -98,37 +99,38 @@ static void parseDataDefinition(Compiler* cmp, char* instr_name, DataDefinition* // skip comments break; case TokenType_Number: - tok_str = Compiler_extractTokenStr(cmp, tok); - if(cstr_seekChar(tok_str, '.', 0, -1) != -1){ - f64 f = atof(tok_str); + tok_str = Compiler_constructTokenStr(cmp, tok); + processed_str = str_copy(tok_str); + if(str_seekChar(tok_str, '.', 0) != -1){ + f64 f = atof(tok_str.data); List_u8_pushBytes(&ddf->data, &f, 8 - ddf->element_size, ddf->element_size); } else { - i64 i = atoll(tok_str); + i64 i = atoll(tok_str.data); List_u8_pushBytes(&ddf->data, &i, 8 - ddf->element_size, ddf->element_size); } + free(processed_str.data); break; case TokenType_Char: tok.begin += 1; tok.length -= 2; - tok_str = Compiler_extractTokenStr(cmp, tok); + tok_str = Compiler_constructTokenStr(cmp, tok); processed_str = resolveEscapeSequences(cmp, tok_str); - free(tok_str); - len = strlen((char*)processed_str); - if(len != ddf->element_size){ - setError("can't fit char of size %i in %u bit variable", len, _element_size_bits); + + if(processed_str.len != ddf->element_size){ + setError("can't fit char of size %i in %u bit variable", processed_str.len, _element_size_bits); return; } - List_u8_pushBytes(&ddf->data, processed_str, 0, len); + List_u8_pushBytes(&ddf->data, processed_str.data, 0, processed_str.len); + free(processed_str.data); break; case TokenType_String: tok.begin += 1; tok.length -= 2; - tok_str = Compiler_extractTokenStr(cmp, tok); + tok_str = Compiler_constructTokenStr(cmp, tok); processed_str = resolveEscapeSequences(cmp, tok_str); - free(tok_str); - len = strlen((char*)processed_str); - List_u8_pushBytes(&ddf->data, processed_str, 0, len); + List_u8_pushBytes(&ddf->data, processed_str.data, 0, processed_str.len); + free(processed_str.data); break; case TokenType_OperationEnd: return; @@ -140,7 +142,7 @@ static void parseDataDefinition(Compiler* cmp, char* instr_name, DataDefinition* } -static void parseOperation(Compiler* cmp, char* instr_name, Operation* operPtr){ +static void parseOperation(Compiler* cmp, str instr_name, Operation* operPtr){ } @@ -162,14 +164,14 @@ bool Compiler_parse(Compiler* cmp){ case TokenType_Label: // create new section sec = List_Section_expand(&cmp->ast.sections, 1); - Section_init(sec, Compiler_extractTokenStr(cmp, tok)); + Section_init(sec, Compiler_constructTokenStr(cmp, tok)); break; case TokenType_Instruction: if(sec == NULL) returnError("no section"); - char* instr_name = Compiler_extractTokenStr(cmp, tok); + str instr_name = Compiler_constructTokenStr(cmp, tok); // data definition starts with const - if(cstr_seek(instr_name, "const", 0, 1)){ + if(str_startsWith(instr_name, STR("const"))){ DataDefinition* dataDefPtr = List_DataDefinition_expand(&sec->data, 1); parseDataDefinition(cmp, instr_name, dataDefPtr); } diff --git a/src/compiler/Token.c b/src/compiler/Token.c index cf62d87..fbf9b20 100644 --- a/src/compiler/Token.c +++ b/src/compiler/Token.c @@ -2,22 +2,22 @@ List_define(Token); -static cstr _TokenType_str[] = { - "Unset", - "SingleLineComment", - "MultiLineComment", - "Instruction", - "Label", - "Number", - "Char", - "String", - "Name", - "NamedDataPointer", - "NamedDataSize" +static str _TokenType_str[] = { + STR("Unset"), + STR("SingleLineComment"), + STR("MultiLineComment"), + STR("Instruction"), + STR("Label"), + STR("Number"), + STR("Char"), + STR("String"), + STR("Name"), + STR("NamedDataPointer"), + STR("NamedDataSize") }; -cstr TokenType_toString(TokenType t){ +str TokenType_toString(TokenType t){ if(t >= ARRAY_SIZE(_TokenType_str)) - return "!!INDEX_ERROR!!"; + return STR("!!INDEX_ERROR!!"); return _TokenType_str[t]; } diff --git a/src/compiler/Token.h b/src/compiler/Token.h index 082998b..3277ef3 100644 --- a/src/compiler/Token.h +++ b/src/compiler/Token.h @@ -1,5 +1,6 @@ #pragma once #include "../std.h" +#include "../string/str.h" #include "../collections/List.h" typedef enum TokenType { @@ -17,7 +18,7 @@ typedef enum TokenType { TokenType_OperationEnd, // EOL or EOF or ; } TokenType; -cstr TokenType_toString(TokenType t); +str TokenType_toString(TokenType t); typedef struct Token { u32 begin; // some index in Compiler->code diff --git a/src/instructions/instructions.h b/src/instructions/instructions.h index 1c8f054..08c5000 100644 --- a/src/instructions/instructions.h +++ b/src/instructions/instructions.h @@ -19,13 +19,13 @@ typedef enum __attribute__((__packed__)) Opcode { } Opcode; typedef struct Instruction { - cstr name; + str name; InstructionImplFunc_t implementation; Opcode opcode; } Instruction; #define Instruction_construct(NAME) {\ - .name = #NAME, \ + .name = STR(#NAME), \ .implementation = NAME##_impl, \ .opcode = Opcode_##NAME\ } diff --git a/src/main.c b/src/main.c index 8546ced..5e3b563 100644 --- a/src/main.c +++ b/src/main.c @@ -38,7 +38,7 @@ i32 main(const i32 argc, cstr* argv){ for(u8 opcode = 0; opcode < 255; opcode++){ const Instruction* instr = Instruction_getByOpcode(opcode); if(instr != NULL){ - printf("%02X %s\n", opcode, instr->name); + printf("%02X %s\n", opcode, instr->name.data); } } return 0;