Compare commits

..

2 Commits

Author SHA1 Message Date
7f606dfaff List growth factor and padding 2025-01-27 09:42:34 +05:00
46e5eb1887 parseDataDefinition 2025-01-27 09:40:49 +05:00
9 changed files with 323 additions and 28 deletions

View File

@ -12,22 +12,26 @@
return (List_##T){ .data = data_ptr, .len = len, .max_len = max_len };\ return (List_##T){ .data = data_ptr, .len = len, .max_len = max_len };\
}\ }\
\ \
static inline List_##T List_##T##_alloc(u32 len){\ List_##T List_##T##_alloc(u32 initial_len);\
return List_##T##_construct((T*)(len > 0 ? malloc(len * sizeof(T)) : NULL), 0, 0);\
}\
\ \
T* List_##T##_expand(List_##T* ptr);\ T* List_##T##_expand(List_##T* ptr);\
void List_##T##_push(List_##T* ptr, T value);\ void List_##T##_push(List_##T* ptr, T value);\
#define List_define(T)\ #define List_define(T)\
List_##T List_##T##_alloc(u32 initial_len){\
if(initial_len == 0)\
return List_##T##_construct((T*)NULL, 0, 0);\
u32 max_Len = ALIGN_TO(initial_len, sizeof(void*)/sizeof(T));\
/* branchless version of max(max_len, __List_min_size) */\
max_len += (max_len < __List_min_size) * (__List_min_size - max_len);\
return List_##T##_construct((T*)malloc(initial_len * sizeof(T)), 0, max_len);\
}\
\
T* List_##T##_expand(List_##T* ptr){\ T* List_##T##_expand(List_##T* ptr){\
if(ptr->len == ptr->max_len){\ if(ptr->len == ptr->max_len){\
u32 max_len = ptr->max_len * 1.5;\ ptr->max_len *= 2;\
max_len += __List_padding_in_sizeof_T(T);\ ptr->data = (T*)realloc(ptr->data, ptr->max_len * sizeof(T));\
/* branchless version of max(max_len, __List_min_size) */\
max_len += (max_len < __List_min_size) * (__List_min_size - max_len);\
ptr->data = (T*)realloc(ptr->data, max_len * sizeof(T));\
ptr->max_len = max_len;\ ptr->max_len = max_len;\
}\ }\
return &ptr->data[ptr->len++];\ return &ptr->data[ptr->len++];\
@ -40,11 +44,6 @@
#define __List_min_size 16 #define __List_min_size 16
// sizeof(T) == 1 - padding is 7 of sizeof(T)
// sizeof(T) == 2 - padding is 3 of sizeof(T)
// sizeof(T) == 4 - padding is 1 of sizeof(T)
#define __List_padding_in_sizeof_T(T) ((8 - sizeof(T) % 8) / sizeof(T) )
List_declare(cstr); List_declare(cstr);
List_declare(u32); List_declare(u32);
List_declare(u8); List_declare(u8);

View File

@ -32,10 +32,9 @@ List_declare(Operation);
typedef struct DataDefinition { typedef struct DataDefinition {
u32 element_size;
u32 count;
cstr name; cstr name;
void* data; List_u8 data;
u32 element_size;
} DataDefinition; } DataDefinition;
List_declare(DataDefinition); List_declare(DataDefinition);

View File

@ -139,7 +139,7 @@ static void readArguments(Compiler* cmp){
} }
} }
// end of line // end of operation
else if(c == '\r' || c == '\n' || c == ';'){ else if(c == '\r' || c == '\n' || c == ';'){
tok.length = cmp->pos - tok.begin; tok.length = cmp->pos - tok.begin;
if(tok.length > 0) if(tok.length > 0)
@ -195,6 +195,8 @@ static void readInstruction(Compiler* cmp){
if(c == '\r' || c == '\n' || c == ';'){ if(c == '\r' || c == '\n' || c == ';'){
tok.length = cmp->pos - tok.begin; tok.length = cmp->pos - tok.begin;
List_Token_push(&cmp->tokens, tok); List_Token_push(&cmp->tokens, tok);
tok = Token_construct(TokenType_OperationEnd, cmp->pos, 1);
List_Token_push(&cmp->tokens, tok);
// cmp->line will be increased in lex() // cmp->line will be increased in lex()
return; return;
} }
@ -204,6 +206,8 @@ static void readInstruction(Compiler* cmp){
tok.length = cmp->pos - tok.begin; tok.length = cmp->pos - tok.begin;
List_Token_push(&cmp->tokens, tok); List_Token_push(&cmp->tokens, tok);
readArguments(cmp); readArguments(cmp);
tok = Token_construct(TokenType_OperationEnd, cmp->pos, 1);
List_Token_push(&cmp->tokens, tok);
return; return;
} }
@ -219,6 +223,8 @@ static void readInstruction(Compiler* cmp){
// end of file // end of file
tok.length = cmp->pos - tok.begin; tok.length = cmp->pos - tok.begin;
List_Token_push(&cmp->tokens, tok); List_Token_push(&cmp->tokens, tok);
tok = Token_construct(TokenType_OperationEnd, cmp->pos, 1);
List_Token_push(&cmp->tokens, tok);
} }
bool Compiler_lex(Compiler* cmp){ bool Compiler_lex(Compiler* cmp){

View File

@ -7,13 +7,136 @@
#define setError_unexpectedToken(T) {\ #define setError_unexpectedToken(T) {\
char* tok_str = Compiler_extractTokenStr(cmp, T);\ char* tok_str = Compiler_extractTokenStr(cmp, T);\
setError("unexpected character '%s'", tok_str);\ cmp->pos = T.begin;\
Compiler_setError(cmp, "unexpected token '%s'", tok_str);\
free(tok_str);\ free(tok_str);\
} }
#define setError_unexpectedTokenChar(T, I) {\
cmp->pos = T.begin + I;\
Compiler_setError(cmp, "unexpected token '%c'", cmp->code[cmp->pos]);\
}
static void parseDataDefinition(Compiler* cmp, char* instr_name, DataDefinition* dataDefPtr){
#define Error_TokenUnset "token of undefined type"
#define Error_BitSize "invalid size in bits"
static void List_u8_pushBytes(List_u8* l, void* value, u32 startIndex, u32 count){
u8* v = value;
for(u32 byte_i = startIndex; byte_i < startIndex + count; byte_i++){
List_u8_push(l, v[byte_i]);
}
}
static inline bool isVarSizeBits(u32 B) { return (B == 8 && B == 16 && B == 32 && B == 64); }
static NULLABLE(u8*) resolveEscapeSequences(Compiler* cmp, cstr src){
u32 len = strlen(src);
List_u8 resolved = List_u8_alloc(len);
char c;
bool escaped = false;
for(u32 i = 0; i < len; i++){
c = src[i];
if(c == '\\'){
escaped = !escaped;
continue;
}
if(!escaped){
List_u8_push(&resolved, c);
continue;
}
// escape codes
switch(c){
case '0':
List_u8_push(&resolved, '\0');
break;
case 'n':
List_u8_push(&resolved, '\n');
break;
case 'r':
List_u8_push(&resolved, '\r');
break;
case 't':
List_u8_push(&resolved, '\t');
break;
case 'e':
List_u8_push(&resolved, '\e');
break;
default:
setError_unexpectedTokenChar(cmp->tokens.data[cmp->tok_i], i);
free(resolved.data);
return NULL;
}
}
return resolved.data;
}
static void parseDataDefinition(Compiler* cmp, char* instr_name, DataDefinition* ddf){
i32 _element_size_bits;
if(sscanf(instr_name, "const%i", &_element_size_bits) != 1 || !isVarSizeBits(_element_size_bits)){
setError(Error_BitSize);
return;
}
ddf->element_size = _element_size_bits / 8;
Token tok = cmp->tokens.data[++cmp->tok_i];
char* tok_str = Compiler_extractTokenStr(cmp, tok);
u8* processed_str = NULL;
i32 len = 0;
ddf->name = tok_str;
while(++cmp->tok_i < cmp->tokens.len){
switch(tok.type){
case TokenType_Unset:
setError(Error_TokenUnset);
return;
case TokenType_SingleLineComment:
case TokenType_MultiLineComment:
// skip comments
break;
case TokenType_Number:
tok_str = Compiler_extractTokenStr(cmp, tok);
if(cstr_seekChar(tok_str, '.', 0, -1) != -1){
f64 f = atof(tok_str);
List_u8_pushBytes(&ddf->data, &f, 8 - ddf->element_size, ddf->element_size);
}
else {
i64 i = atoll(tok_str);
List_u8_pushBytes(&ddf->data, &i, 8 - ddf->element_size, ddf->element_size);
}
break;
case TokenType_Char:
tok.begin += 1;
tok.length -= 2;
tok_str = Compiler_extractTokenStr(cmp, tok);
processed_str = resolveEscapeSequences(cmp, tok_str);
free(tok_str);
len = strlen(processed_str);
if(len != ddf->element_size){
setError("can't fit char of size %i in %u bit variable", len, _element_size_bits);
return;
}
List_u8_pushBytes(&ddf->data, processed_str, 0, len);
break;
case TokenType_String:
tok.begin += 1;
tok.length -= 2;
tok_str = Compiler_extractTokenStr(cmp, tok);
processed_str = resolveEscapeSequences(cmp, tok_str);
free(tok_str);
len = strlen(processed_str);
List_u8_pushBytes(&ddf->data, processed_str, 0, len);
break;
case TokenType_OperationEnd:
return;
default:
setError_unexpectedToken(tok);
return;
}
}
} }
@ -31,7 +154,7 @@ bool Compiler_parse(Compiler* cmp){
tok = cmp->tokens.data[cmp->tok_i]; tok = cmp->tokens.data[cmp->tok_i];
switch(tok.type){ switch(tok.type){
case TokenType_Unset: case TokenType_Unset:
returnError("token of undefined type"); returnError(Error_TokenUnset);
case TokenType_SingleLineComment: case TokenType_SingleLineComment:
case TokenType_MultiLineComment: case TokenType_MultiLineComment:
// skip comments // skip comments
@ -42,6 +165,8 @@ bool Compiler_parse(Compiler* cmp){
Section_init(sec, Compiler_extractTokenStr(cmp, tok)); Section_init(sec, Compiler_extractTokenStr(cmp, tok));
break; break;
case TokenType_Instruction: case TokenType_Instruction:
if(sec == NULL)
returnError("no section");
char* instr_name = Compiler_extractTokenStr(cmp, tok); char* instr_name = Compiler_extractTokenStr(cmp, tok);
// data definition starts with const // data definition starts with const
if(cstr_seek(instr_name, "const", 0, 1)){ if(cstr_seek(instr_name, "const", 0, 1)){

View File

@ -13,7 +13,8 @@ typedef enum TokenType {
TokenType_String, // "aaaa" TokenType_String, // "aaaa"
TokenType_Name, // xyz TokenType_Name, // xyz
TokenType_NamedDataPointer, // @xyz TokenType_NamedDataPointer, // @xyz
TokenType_NamedDataSize // #xyz TokenType_NamedDataSize, // #xyz
TokenType_OperationEnd, // EOL or EOF or ;
} TokenType; } TokenType;
cstr TokenType_toString(TokenType t); cstr TokenType_toString(TokenType t);
@ -26,4 +27,4 @@ typedef struct Token {
List_declare(Token); List_declare(Token);
#define Token_construct(TYPE, BEGIN, END) ((Token){ .type = TYPE, .begin = BEGIN, .length = END }) #define Token_construct(TYPE, BEGIN, LEN) ((Token){ .type = TYPE, .begin = BEGIN, .length = LEN })

View File

@ -7,13 +7,136 @@
#define setError_unexpectedToken(T) {\ #define setError_unexpectedToken(T) {\
char* tok_str = Compiler_extractTokenStr(cmp, T);\ char* tok_str = Compiler_extractTokenStr(cmp, T);\
setError("unexpected character '%s'", tok_str);\ cmp->pos = T.begin;\
Compiler_setError(cmp, "unexpected token '%s'", tok_str);\
free(tok_str);\ free(tok_str);\
} }
#define setError_unexpectedTokenChar(T, I) {\
cmp->pos = T.begin + I;\
Compiler_setError(cmp, "unexpected token '%c'", cmp->code[cmp->pos]);\
}
static void parseDataDefinition(Compiler* cmp, char* instr_name, DataDefinition* dataDefPtr){
#define Error_TokenUnset "token of undefined type"
#define Error_BitSize "invalid size in bits"
static void List_u8_pushBytes(List_u8* l, void* value, u32 startIndex, u32 count){
u8* v = value;
for(u32 byte_i = startIndex; byte_i < startIndex + count; byte_i++){
List_u8_push(l, v[byte_i]);
}
}
static inline bool isVarSizeBits(u32 B) { return (B == 8 && B == 16 && B == 32 && B == 64); }
static NULLABLE(u8*) resolveEscapeSequences(Compiler* cmp, cstr src){
u32 len = strlen(src);
List_u8 resolved = List_u8_alloc(len);
char c;
bool escaped = false;
for(u32 i = 0; i < len; i++){
c = src[i];
if(c == '\\'){
escaped = !escaped;
continue;
}
if(!escaped){
List_u8_push(&resolved, c);
continue;
}
// escape codes
switch(c){
case '0':
List_u8_push(&resolved, '\0');
break;
case 'n':
List_u8_push(&resolved, '\n');
break;
case 'r':
List_u8_push(&resolved, '\r');
break;
case 't':
List_u8_push(&resolved, '\t');
break;
case 'e':
List_u8_push(&resolved, '\e');
break;
default:
setError_unexpectedTokenChar(cmp->tokens.data[cmp->tok_i], i);
free(resolved.data);
return NULL;
}
}
return resolved.data;
}
static void parseDataDefinition(Compiler* cmp, char* instr_name, DataDefinition* ddf){
i32 _element_size_bits;
if(sscanf(instr_name, "const%i", &_element_size_bits) != 1 || !isVarSizeBits(_element_size_bits)){
setError(Error_BitSize);
return;
}
ddf->element_size = _element_size_bits / 8;
Token tok = cmp->tokens.data[++cmp->tok_i];
char* tok_str = Compiler_extractTokenStr(cmp, tok);
u8* processed_str = NULL;
i32 len = 0;
ddf->name = tok_str;
while(++cmp->tok_i < cmp->tokens.len){
switch(tok.type){
case TokenType_Unset:
setError(Error_TokenUnset);
return;
case TokenType_SingleLineComment:
case TokenType_MultiLineComment:
// skip comments
break;
case TokenType_Number:
tok_str = Compiler_extractTokenStr(cmp, tok);
if(cstr_seekChar(tok_str, '.', 0, -1) != -1){
f64 f = atof(tok_str);
List_u8_pushBytes(&ddf->data, &f, 8 - ddf->element_size, ddf->element_size);
}
else {
i64 i = atoll(tok_str);
List_u8_pushBytes(&ddf->data, &i, 8 - ddf->element_size, ddf->element_size);
}
break;
case TokenType_Char:
tok.begin += 1;
tok.length -= 2;
tok_str = Compiler_extractTokenStr(cmp, tok);
processed_str = resolveEscapeSequences(cmp, tok_str);
free(tok_str);
len = strlen(processed_str);
if(len != ddf->element_size){
setError("can't fit char of size %i in %u bit variable", len, _element_size_bits);
return;
}
List_u8_pushBytes(&ddf->data, processed_str, 0, len);
break;
case TokenType_String:
tok.begin += 1;
tok.length -= 2;
tok_str = Compiler_extractTokenStr(cmp, tok);
processed_str = resolveEscapeSequences(cmp, tok_str);
free(tok_str);
len = strlen(processed_str);
List_u8_pushBytes(&ddf->data, processed_str, 0, len);
break;
case TokenType_OperationEnd:
return;
default:
setError_unexpectedToken(tok);
return;
}
}
} }
@ -31,7 +154,7 @@ bool Compiler_parse(Compiler* cmp){
tok = cmp->tokens.data[cmp->tok_i]; tok = cmp->tokens.data[cmp->tok_i];
switch(tok.type){ switch(tok.type){
case TokenType_Unset: case TokenType_Unset:
returnError("token of undefined type"); returnError(Error_TokenUnset);
case TokenType_SingleLineComment: case TokenType_SingleLineComment:
case TokenType_MultiLineComment: case TokenType_MultiLineComment:
// skip comments // skip comments
@ -42,6 +165,8 @@ bool Compiler_parse(Compiler* cmp){
Section_init(sec, Compiler_extractTokenStr(cmp, tok)); Section_init(sec, Compiler_extractTokenStr(cmp, tok));
break; break;
case TokenType_Instruction: case TokenType_Instruction:
if(sec == NULL)
returnError("no section");
char* instr_name = Compiler_extractTokenStr(cmp, tok); char* instr_name = Compiler_extractTokenStr(cmp, tok);
// data definition starts with const // data definition starts with const
if(cstr_seek(instr_name, "const", 0, 1)){ if(cstr_seek(instr_name, "const", 0, 1)){

View File

@ -13,7 +13,8 @@ typedef enum TokenType {
TokenType_String, // "aaaa" TokenType_String, // "aaaa"
TokenType_Name, // xyz TokenType_Name, // xyz
TokenType_NamedDataPointer, // @xyz TokenType_NamedDataPointer, // @xyz
TokenType_NamedDataSize // #xyz TokenType_NamedDataSize, // #xyz
TokenType_OperationEnd, // EOL or EOF or ;
} TokenType; } TokenType;
cstr TokenType_toString(TokenType t); cstr TokenType_toString(TokenType t);
@ -26,4 +27,4 @@ typedef struct Token {
List_declare(Token); List_declare(Token);
#define Token_construct(TYPE, BEGIN, END) ((Token){ .type = TYPE, .begin = BEGIN, .length = END }) #define Token_construct(TYPE, BEGIN, LEN) ((Token){ .type = TYPE, .begin = BEGIN, .length = LEN })

View File

@ -89,4 +89,32 @@ i32 cstr_seekReverse(const char* src, const char* fragment, u32 startIndex, u32
} }
} }
return -1; return -1;
} }
i32 cstr_seekChar(const char* src, char fragment, u32 startIndex, u32 seekLength){
char sc=*src;
if(sc==0 || fragment==0)
return -1;
for(u32 si=startIndex; si-startIndex<seekLength && sc!=0; si++){
sc=src[si];
if(sc==fragment)
return si;
}
return -1;
}
i32 cstr_seekCharReverse(const char* src, char fragment, u32 startIndex, u32 seekLength){
char sc=*src;
if(sc==0 || fragment==0)
return -1;
i32 len=strlen(src);
if(startIndex==(u32)-1)
startIndex=len-1;
for(u32 si=startIndex; si<(u32)-1 && si!=len-1-seekLength; si--){
sc=src[si];
if(sc==fragment)
return si;
}
return -1;
}

View File

@ -27,6 +27,7 @@ typedef u8 bool;
typedef const char* cstr; typedef const char* cstr;
#define ARRAY_SIZE(A) sizeof(A)/sizeof(A[0]) #define ARRAY_SIZE(A) sizeof(A)/sizeof(A[0])
#define ALIGN_TO(_SIZE,_ALIGN) (((_SIZE) + ((_ALIGN) - 1)) & ~((_ALIGN) - 1))
#define __count_args( \ #define __count_args( \
a0, a1, a2, a3, a4, a5, a6, a7 , a8, a9, a10,a11,a12,a13,a14,a15, \ a0, a1, a2, a3, a4, a5, a6, a7 , a8, a9, a10,a11,a12,a13,a14,a15, \
@ -67,3 +68,13 @@ i32 cstr_seek(const char* src, const char* fragment, u32 startIndex, u32 seekLen
/// @param seekLength 0 ... -1 /// @param seekLength 0 ... -1
/// @return pos of first <fragment> inclusion in <src> or -1 if not found /// @return pos of first <fragment> inclusion in <src> or -1 if not found
i32 cstr_seekReverse(const char* src, const char* fragment, u32 startIndex, u32 seekLength); i32 cstr_seekReverse(const char* src, const char* fragment, u32 startIndex, u32 seekLength);
/// @param startIndex 0 ... src length
/// @param seekLength 0 ... -1
/// @return pos of first <fragment> inclusion in <src> or -1 if not found
i32 cstr_seekChar(const char* src, char fragment, u32 startIndex, u32 seekLength);
/// @param startIndex -1 ... src length
/// @param seekLength 0 ... -1
/// @return pos of first <fragment> inclusion in <src> or -1 if not found
i32 cstr_seekCharReverse(const char* src, char fragment, u32 startIndex, u32 seekLength);