List growth factor and padding

parseDataDefinition
2025-01-27 09:42:34 +05:00 · 2025-01-27 09:40:49 +05:00
9 changed files with 323 additions and 28 deletions
--- a/src/collections/List.h
+++ b/src/collections/List.h
@ -12,22 +12,26 @@
        return (List_##T){ .data = data_ptr, .len = len, .max_len = max_len };\
    }\
    \
-    static inline List_##T List_##T##_alloc(u32 len){\
+    List_##T List_##T##_alloc(u32 initial_len);\
        return List_##T##_construct((T*)(len > 0 ? malloc(len * sizeof(T)) : NULL), 0, 0);\
    }\
    \
    T* List_##T##_expand(List_##T* ptr);\
    void List_##T##_push(List_##T* ptr, T value);\
 #define List_define(T)\
    List_##T List_##T##_alloc(u32 initial_len){\
        if(initial_len == 0)\
            return List_##T##_construct((T*)NULL, 0, 0);\
        u32 max_Len = ALIGN_TO(initial_len, sizeof(void*)/sizeof(T));\
        /* branchless version of max(max_len, __List_min_size) */\
        max_len += (max_len < __List_min_size) * (__List_min_size - max_len);\
        return List_##T##_construct((T*)malloc(initial_len * sizeof(T)), 0, max_len);\
    }\
    \
    T* List_##T##_expand(List_##T* ptr){\
        if(ptr->len == ptr->max_len){\
-            u32 max_len = ptr->max_len * 1.5;\
+            ptr->max_len *= 2;\
-            max_len += __List_padding_in_sizeof_T(T);\
+            ptr->data = (T*)realloc(ptr->data, ptr->max_len * sizeof(T));\
            /* branchless version of max(max_len, __List_min_size) */\
            max_len += (max_len < __List_min_size) * (__List_min_size - max_len);\
            ptr->data = (T*)realloc(ptr->data, max_len * sizeof(T));\
            ptr->max_len = max_len;\
        }\
        return &ptr->data[ptr->len++];\
@ -40,11 +44,6 @@
 #define __List_min_size 16
 // sizeof(T) == 1  - padding is 7 of sizeof(T)
 // sizeof(T) == 2  - padding is 3 of sizeof(T)
 // sizeof(T) == 4  - padding is 1 of sizeof(T)
 #define __List_padding_in_sizeof_T(T) ((8 - sizeof(T) % 8) / sizeof(T) )
 List_declare(cstr);
 List_declare(u32);
 List_declare(u8);
--- a/src/compiler/AST.h
+++ b/src/compiler/AST.h
@ -32,10 +32,9 @@ List_declare(Operation);
 typedef struct DataDefinition {
    u32 element_size;
    u32 count;
    cstr name;
-    void* data;
+    List_u8 data;
    u32 element_size;
 } DataDefinition;
 List_declare(DataDefinition);
--- a/src/compiler/Lexer.c
+++ b/src/compiler/Lexer.c
@ -139,7 +139,7 @@ static void readArguments(Compiler* cmp){
            }
        }
-        // end of line
+        // end of operation
        else if(c == '\r' || c == '\n' || c == ';'){
            tok.length = cmp->pos - tok.begin;
            if(tok.length > 0)
@ -195,6 +195,8 @@ static void readInstruction(Compiler* cmp){
        if(c == '\r' || c == '\n' || c == ';'){
            tok.length = cmp->pos - tok.begin;
            List_Token_push(&cmp->tokens, tok);
            tok = Token_construct(TokenType_OperationEnd, cmp->pos, 1);
            List_Token_push(&cmp->tokens, tok);
            // cmp->line will be increased in lex()
            return;
        }
@ -204,6 +206,8 @@ static void readInstruction(Compiler* cmp){
            tok.length = cmp->pos - tok.begin;
            List_Token_push(&cmp->tokens, tok);
            readArguments(cmp);
            tok = Token_construct(TokenType_OperationEnd, cmp->pos, 1);
            List_Token_push(&cmp->tokens, tok);
            return;
        }
@ -219,6 +223,8 @@ static void readInstruction(Compiler* cmp){
    // end of file
    tok.length = cmp->pos - tok.begin;
    List_Token_push(&cmp->tokens, tok);
    tok = Token_construct(TokenType_OperationEnd, cmp->pos, 1);
    List_Token_push(&cmp->tokens, tok);
 }
 bool Compiler_lex(Compiler* cmp){
--- a/src/compiler/Parser.c
+++ b/src/compiler/Parser.c
@ -7,13 +7,136 @@
 #define setError_unexpectedToken(T) {\
    char* tok_str = Compiler_extractTokenStr(cmp, T);\
-    setError("unexpected character '%s'", tok_str);\
+    cmp->pos = T.begin;\
    Compiler_setError(cmp, "unexpected token '%s'", tok_str);\
    free(tok_str);\
 }
 #define setError_unexpectedTokenChar(T, I) {\
    cmp->pos = T.begin + I;\
    Compiler_setError(cmp, "unexpected token '%c'", cmp->code[cmp->pos]);\
 }
 static void parseDataDefinition(Compiler* cmp, char* instr_name, DataDefinition* dataDefPtr){
 #define Error_TokenUnset "token of undefined type"
 #define Error_BitSize "invalid size in bits"
 static void List_u8_pushBytes(List_u8* l, void* value, u32 startIndex, u32 count){
    u8* v = value;
    for(u32 byte_i = startIndex; byte_i < startIndex + count; byte_i++){
        List_u8_push(l, v[byte_i]);
    }
 }
 static inline bool isVarSizeBits(u32 B) { return (B == 8 && B == 16 && B == 32 && B == 64); }
 static NULLABLE(u8*) resolveEscapeSequences(Compiler* cmp, cstr src){
    u32 len = strlen(src);
    List_u8 resolved = List_u8_alloc(len);
    char c;
    bool escaped = false;
    for(u32 i = 0; i < len; i++){
        c = src[i];
        if(c == '\\'){
            escaped = !escaped;
            continue;
        }
        if(!escaped){
            List_u8_push(&resolved, c);
            continue;
        }
        // escape codes
        switch(c){
            case '0':
                List_u8_push(&resolved, '\0');
                break;
            case 'n':
                List_u8_push(&resolved, '\n');
                break;
            case 'r':
                List_u8_push(&resolved, '\r');
                break;
            case 't':
                List_u8_push(&resolved, '\t');
                break;
            case 'e':
                List_u8_push(&resolved, '\e');
                break;
            default:
                setError_unexpectedTokenChar(cmp->tokens.data[cmp->tok_i], i);
                free(resolved.data);
                return NULL;
        }
    }
    return resolved.data;
 }
 static void parseDataDefinition(Compiler* cmp, char* instr_name, DataDefinition* ddf){
    i32 _element_size_bits;
    if(sscanf(instr_name, "const%i", &_element_size_bits) != 1 || !isVarSizeBits(_element_size_bits)){
        setError(Error_BitSize);
        return;
    }
    ddf->element_size = _element_size_bits / 8;
    Token tok = cmp->tokens.data[++cmp->tok_i];
    char* tok_str = Compiler_extractTokenStr(cmp, tok);
    u8* processed_str = NULL;
    i32 len = 0;
    ddf->name = tok_str;
    while(++cmp->tok_i < cmp->tokens.len){
        switch(tok.type){
            case TokenType_Unset:
                setError(Error_TokenUnset);
                return;
            case TokenType_SingleLineComment:
            case TokenType_MultiLineComment:
                // skip comments
                break;
            case TokenType_Number:
                tok_str = Compiler_extractTokenStr(cmp, tok);
                if(cstr_seekChar(tok_str, '.', 0, -1) != -1){
                    f64 f = atof(tok_str);
                    List_u8_pushBytes(&ddf->data, &f, 8 - ddf->element_size, ddf->element_size);
                }
                else {
                    i64 i = atoll(tok_str);
                    List_u8_pushBytes(&ddf->data, &i, 8 - ddf->element_size, ddf->element_size);
                }
                break;
            case TokenType_Char:
                tok.begin += 1;
                tok.length -= 2;
                tok_str = Compiler_extractTokenStr(cmp, tok);
                processed_str = resolveEscapeSequences(cmp, tok_str);
                free(tok_str);
                len = strlen(processed_str);
                if(len != ddf->element_size){
                    setError("can't fit char of size %i in %u bit variable", len, _element_size_bits);
                    return;
                }
                List_u8_pushBytes(&ddf->data, processed_str, 0, len);
                break;
            case TokenType_String:
                tok.begin += 1;
                tok.length -= 2;
                tok_str = Compiler_extractTokenStr(cmp, tok);
                processed_str = resolveEscapeSequences(cmp, tok_str);
                free(tok_str);
                len = strlen(processed_str);
                List_u8_pushBytes(&ddf->data, processed_str, 0, len);
                break;
            case TokenType_OperationEnd:
                return;
            default:
                setError_unexpectedToken(tok);
                return;
        }
    }
 }
@ -31,7 +154,7 @@ bool Compiler_parse(Compiler* cmp){
        tok = cmp->tokens.data[cmp->tok_i];
        switch(tok.type){
            case TokenType_Unset:
-                returnError("token of undefined type");
+                returnError(Error_TokenUnset);
            case TokenType_SingleLineComment:
            case TokenType_MultiLineComment:
                // skip comments
@ -42,6 +165,8 @@ bool Compiler_parse(Compiler* cmp){
                Section_init(sec, Compiler_extractTokenStr(cmp, tok));
                break;
            case TokenType_Instruction:
                if(sec == NULL)
                    returnError("no section");
                char* instr_name = Compiler_extractTokenStr(cmp, tok);
                // data definition starts with const
                if(cstr_seek(instr_name, "const", 0, 1)){
--- a/src/compiler/Token.h
+++ b/src/compiler/Token.h
@ -13,7 +13,8 @@ typedef enum TokenType {
    TokenType_String,               // "aaaa"
    TokenType_Name,                 // xyz
    TokenType_NamedDataPointer,     // @xyz
-    TokenType_NamedDataSize         // #xyz
+    TokenType_NamedDataSize,        // #xyz
    TokenType_OperationEnd,         // EOL or EOF or ;
 } TokenType;
 cstr TokenType_toString(TokenType t);
@ -26,4 +27,4 @@ typedef struct Token {
 List_declare(Token);
-#define Token_construct(TYPE, BEGIN, END) ((Token){ .type = TYPE, .begin = BEGIN, .length = END })
+#define Token_construct(TYPE, BEGIN, LEN) ((Token){ .type = TYPE, .begin = BEGIN, .length = LEN })
--- a/src/compiler/parser.c
+++ b/src/compiler/parser.c
@ -7,13 +7,136 @@
 #define setError_unexpectedToken(T) {\
    char* tok_str = Compiler_extractTokenStr(cmp, T);\
-    setError("unexpected character '%s'", tok_str);\
+    cmp->pos = T.begin;\
    Compiler_setError(cmp, "unexpected token '%s'", tok_str);\
    free(tok_str);\
 }
 #define setError_unexpectedTokenChar(T, I) {\
    cmp->pos = T.begin + I;\
    Compiler_setError(cmp, "unexpected token '%c'", cmp->code[cmp->pos]);\
 }
 static void parseDataDefinition(Compiler* cmp, char* instr_name, DataDefinition* dataDefPtr){
 #define Error_TokenUnset "token of undefined type"
 #define Error_BitSize "invalid size in bits"
 static void List_u8_pushBytes(List_u8* l, void* value, u32 startIndex, u32 count){
    u8* v = value;
    for(u32 byte_i = startIndex; byte_i < startIndex + count; byte_i++){
        List_u8_push(l, v[byte_i]);
    }
 }
 static inline bool isVarSizeBits(u32 B) { return (B == 8 && B == 16 && B == 32 && B == 64); }
 static NULLABLE(u8*) resolveEscapeSequences(Compiler* cmp, cstr src){
    u32 len = strlen(src);
    List_u8 resolved = List_u8_alloc(len);
    char c;
    bool escaped = false;
    for(u32 i = 0; i < len; i++){
        c = src[i];
        if(c == '\\'){
            escaped = !escaped;
            continue;
        }
        if(!escaped){
            List_u8_push(&resolved, c);
            continue;
        }
        // escape codes
        switch(c){
            case '0':
                List_u8_push(&resolved, '\0');
                break;
            case 'n':
                List_u8_push(&resolved, '\n');
                break;
            case 'r':
                List_u8_push(&resolved, '\r');
                break;
            case 't':
                List_u8_push(&resolved, '\t');
                break;
            case 'e':
                List_u8_push(&resolved, '\e');
                break;
            default:
                setError_unexpectedTokenChar(cmp->tokens.data[cmp->tok_i], i);
                free(resolved.data);
                return NULL;
        }
    }
    return resolved.data;
 }
 static void parseDataDefinition(Compiler* cmp, char* instr_name, DataDefinition* ddf){
    i32 _element_size_bits;
    if(sscanf(instr_name, "const%i", &_element_size_bits) != 1 || !isVarSizeBits(_element_size_bits)){
        setError(Error_BitSize);
        return;
    }
    ddf->element_size = _element_size_bits / 8;
    Token tok = cmp->tokens.data[++cmp->tok_i];
    char* tok_str = Compiler_extractTokenStr(cmp, tok);
    u8* processed_str = NULL;
    i32 len = 0;
    ddf->name = tok_str;
    while(++cmp->tok_i < cmp->tokens.len){
        switch(tok.type){
            case TokenType_Unset:
                setError(Error_TokenUnset);
                return;
            case TokenType_SingleLineComment:
            case TokenType_MultiLineComment:
                // skip comments
                break;
            case TokenType_Number:
                tok_str = Compiler_extractTokenStr(cmp, tok);
                if(cstr_seekChar(tok_str, '.', 0, -1) != -1){
                    f64 f = atof(tok_str);
                    List_u8_pushBytes(&ddf->data, &f, 8 - ddf->element_size, ddf->element_size);
                }
                else {
                    i64 i = atoll(tok_str);
                    List_u8_pushBytes(&ddf->data, &i, 8 - ddf->element_size, ddf->element_size);
                }
                break;
            case TokenType_Char:
                tok.begin += 1;
                tok.length -= 2;
                tok_str = Compiler_extractTokenStr(cmp, tok);
                processed_str = resolveEscapeSequences(cmp, tok_str);
                free(tok_str);
                len = strlen(processed_str);
                if(len != ddf->element_size){
                    setError("can't fit char of size %i in %u bit variable", len, _element_size_bits);
                    return;
                }
                List_u8_pushBytes(&ddf->data, processed_str, 0, len);
                break;
            case TokenType_String:
                tok.begin += 1;
                tok.length -= 2;
                tok_str = Compiler_extractTokenStr(cmp, tok);
                processed_str = resolveEscapeSequences(cmp, tok_str);
                free(tok_str);
                len = strlen(processed_str);
                List_u8_pushBytes(&ddf->data, processed_str, 0, len);
                break;
            case TokenType_OperationEnd:
                return;
            default:
                setError_unexpectedToken(tok);
                return;
        }
    }
 }
@ -31,7 +154,7 @@ bool Compiler_parse(Compiler* cmp){
        tok = cmp->tokens.data[cmp->tok_i];
        switch(tok.type){
            case TokenType_Unset:
-                returnError("token of undefined type");
+                returnError(Error_TokenUnset);
            case TokenType_SingleLineComment:
            case TokenType_MultiLineComment:
                // skip comments
@ -42,6 +165,8 @@ bool Compiler_parse(Compiler* cmp){
                Section_init(sec, Compiler_extractTokenStr(cmp, tok));
                break;
            case TokenType_Instruction:
                if(sec == NULL)
                    returnError("no section");
                char* instr_name = Compiler_extractTokenStr(cmp, tok);
                // data definition starts with const
                if(cstr_seek(instr_name, "const", 0, 1)){
--- a/src/compiler/token.h
+++ b/src/compiler/token.h
@ -13,7 +13,8 @@ typedef enum TokenType {
    TokenType_String,               // "aaaa"
    TokenType_Name,                 // xyz
    TokenType_NamedDataPointer,     // @xyz
-    TokenType_NamedDataSize         // #xyz
+    TokenType_NamedDataSize,        // #xyz
    TokenType_OperationEnd,         // EOL or EOF or ;
 } TokenType;
 cstr TokenType_toString(TokenType t);
@ -26,4 +27,4 @@ typedef struct Token {
 List_declare(Token);
-#define Token_construct(TYPE, BEGIN, END) ((Token){ .type = TYPE, .begin = BEGIN, .length = END })
+#define Token_construct(TYPE, BEGIN, LEN) ((Token){ .type = TYPE, .begin = BEGIN, .length = LEN })
--- a/src/cstr.c
+++ b/src/cstr.c
@ -89,4 +89,32 @@ i32 cstr_seekReverse(const char* src, const char* fragment, u32 startIndex, u32
        }
    }
    return -1;
-}
+}
 i32 cstr_seekChar(const char* src, char fragment, u32 startIndex, u32 seekLength){
    char sc=*src;
    if(sc==0 || fragment==0)
        return -1;
    for(u32 si=startIndex; si-startIndex<seekLength && sc!=0; si++){
        sc=src[si];
        if(sc==fragment)
            return si;
    }
    return -1;
 }
 i32 cstr_seekCharReverse(const char* src, char fragment, u32 startIndex, u32 seekLength){
    char sc=*src;
    if(sc==0 || fragment==0)
        return -1;
    i32 len=strlen(src);
    if(startIndex==(u32)-1)
        startIndex=len-1;
    for(u32 si=startIndex; si<(u32)-1 && si!=len-1-seekLength; si--){
        sc=src[si];
        if(sc==fragment)
            return si;
    }
    return -1;
 }
--- a/src/std.h
+++ b/src/std.h
@ -27,6 +27,7 @@ typedef u8 bool;
 typedef const char* cstr;
 #define ARRAY_SIZE(A) sizeof(A)/sizeof(A[0])
 #define ALIGN_TO(_SIZE,_ALIGN)        (((_SIZE) + ((_ALIGN) - 1)) & ~((_ALIGN) - 1))
 #define __count_args( \
    a0, a1, a2, a3, a4, a5, a6, a7 , a8, a9, a10,a11,a12,a13,a14,a15, \
@ -67,3 +68,13 @@ i32 cstr_seek(const char* src, const char* fragment, u32 startIndex, u32 seekLen
 /// @param seekLength 0 ... -1
 /// @return pos of first <fragment> inclusion in <src> or -1 if not found
 i32 cstr_seekReverse(const char* src, const char* fragment, u32 startIndex, u32 seekLength);
 /// @param startIndex 0 ... src length
 /// @param seekLength 0 ... -1
 /// @return pos of first <fragment> inclusion in <src> or -1 if not found
 i32 cstr_seekChar(const char* src, char fragment, u32 startIndex, u32 seekLength);
 /// @param startIndex -1 ... src length
 /// @param seekLength 0 ... -1
 /// @return pos of first <fragment> inclusion in <src> or -1 if not found
 i32 cstr_seekCharReverse(const char* src, char fragment, u32 startIndex, u32 seekLength);
Author	SHA1	Message	Date
Timerix	7f606dfaff	List growth factor and padding	2025-01-27 09:42:34 +05:00
Timerix	46e5eb1887	parseDataDefinition	2025-01-27 09:40:49 +05:00