parseDataDefinition

This commit is contained in:
Timerix 2025-01-27 09:40:49 +05:00
parent e43a987e1e
commit 46e5eb1887
8 changed files with 311 additions and 15 deletions

View File

@ -32,10 +32,9 @@ List_declare(Operation);
typedef struct DataDefinition {
u32 element_size;
u32 count;
cstr name;
void* data;
List_u8 data;
u32 element_size;
} DataDefinition;
List_declare(DataDefinition);

View File

@ -139,7 +139,7 @@ static void readArguments(Compiler* cmp){
}
}
// end of line
// end of operation
else if(c == '\r' || c == '\n' || c == ';'){
tok.length = cmp->pos - tok.begin;
if(tok.length > 0)
@ -195,6 +195,8 @@ static void readInstruction(Compiler* cmp){
if(c == '\r' || c == '\n' || c == ';'){
tok.length = cmp->pos - tok.begin;
List_Token_push(&cmp->tokens, tok);
tok = Token_construct(TokenType_OperationEnd, cmp->pos, 1);
List_Token_push(&cmp->tokens, tok);
// cmp->line will be increased in lex()
return;
}
@ -204,6 +206,8 @@ static void readInstruction(Compiler* cmp){
tok.length = cmp->pos - tok.begin;
List_Token_push(&cmp->tokens, tok);
readArguments(cmp);
tok = Token_construct(TokenType_OperationEnd, cmp->pos, 1);
List_Token_push(&cmp->tokens, tok);
return;
}
@ -219,6 +223,8 @@ static void readInstruction(Compiler* cmp){
// end of file
tok.length = cmp->pos - tok.begin;
List_Token_push(&cmp->tokens, tok);
tok = Token_construct(TokenType_OperationEnd, cmp->pos, 1);
List_Token_push(&cmp->tokens, tok);
}
bool Compiler_lex(Compiler* cmp){

View File

@ -7,13 +7,136 @@
#define setError_unexpectedToken(T) {\
char* tok_str = Compiler_extractTokenStr(cmp, T);\
setError("unexpected character '%s'", tok_str);\
cmp->pos = T.begin;\
Compiler_setError(cmp, "unexpected token '%s'", tok_str);\
free(tok_str);\
}
#define setError_unexpectedTokenChar(T, I) {\
cmp->pos = T.begin + I;\
Compiler_setError(cmp, "unexpected token '%c'", cmp->code[cmp->pos]);\
}
static void parseDataDefinition(Compiler* cmp, char* instr_name, DataDefinition* dataDefPtr){
#define Error_TokenUnset "token of undefined type"
#define Error_BitSize "invalid size in bits"
static void List_u8_pushBytes(List_u8* l, void* value, u32 startIndex, u32 count){
u8* v = value;
for(u32 byte_i = startIndex; byte_i < startIndex + count; byte_i++){
List_u8_push(l, v[byte_i]);
}
}
static inline bool isVarSizeBits(u32 B) { return (B == 8 && B == 16 && B == 32 && B == 64); }
static NULLABLE(u8*) resolveEscapeSequences(Compiler* cmp, cstr src){
u32 len = strlen(src);
List_u8 resolved = List_u8_alloc(len);
char c;
bool escaped = false;
for(u32 i = 0; i < len; i++){
c = src[i];
if(c == '\\'){
escaped = !escaped;
continue;
}
if(!escaped){
List_u8_push(&resolved, c);
continue;
}
// escape codes
switch(c){
case '0':
List_u8_push(&resolved, '\0');
break;
case 'n':
List_u8_push(&resolved, '\n');
break;
case 'r':
List_u8_push(&resolved, '\r');
break;
case 't':
List_u8_push(&resolved, '\t');
break;
case 'e':
List_u8_push(&resolved, '\e');
break;
default:
setError_unexpectedTokenChar(cmp->tokens.data[cmp->tok_i], i);
free(resolved.data);
return NULL;
}
}
return resolved.data;
}
static void parseDataDefinition(Compiler* cmp, char* instr_name, DataDefinition* ddf){
i32 _element_size_bits;
if(sscanf(instr_name, "const%i", &_element_size_bits) != 1 || !isVarSizeBits(_element_size_bits)){
setError(Error_BitSize);
return;
}
ddf->element_size = _element_size_bits / 8;
Token tok = cmp->tokens.data[++cmp->tok_i];
char* tok_str = Compiler_extractTokenStr(cmp, tok);
u8* processed_str = NULL;
i32 len = 0;
ddf->name = tok_str;
while(++cmp->tok_i < cmp->tokens.len){
switch(tok.type){
case TokenType_Unset:
setError(Error_TokenUnset);
return;
case TokenType_SingleLineComment:
case TokenType_MultiLineComment:
// skip comments
break;
case TokenType_Number:
tok_str = Compiler_extractTokenStr(cmp, tok);
if(cstr_seekChar(tok_str, '.', 0, -1) != -1){
f64 f = atof(tok_str);
List_u8_pushBytes(&ddf->data, &f, 8 - ddf->element_size, ddf->element_size);
}
else {
i64 i = atoll(tok_str);
List_u8_pushBytes(&ddf->data, &i, 8 - ddf->element_size, ddf->element_size);
}
break;
case TokenType_Char:
tok.begin += 1;
tok.length -= 2;
tok_str = Compiler_extractTokenStr(cmp, tok);
processed_str = resolveEscapeSequences(cmp, tok_str);
free(tok_str);
len = strlen(processed_str);
if(len != ddf->element_size){
setError("can't fit char of size %i in %u bit variable", len, _element_size_bits);
return;
}
List_u8_pushBytes(&ddf->data, processed_str, 0, len);
break;
case TokenType_String:
tok.begin += 1;
tok.length -= 2;
tok_str = Compiler_extractTokenStr(cmp, tok);
processed_str = resolveEscapeSequences(cmp, tok_str);
free(tok_str);
len = strlen(processed_str);
List_u8_pushBytes(&ddf->data, processed_str, 0, len);
break;
case TokenType_OperationEnd:
return;
default:
setError_unexpectedToken(tok);
return;
}
}
}
@ -31,7 +154,7 @@ bool Compiler_parse(Compiler* cmp){
tok = cmp->tokens.data[cmp->tok_i];
switch(tok.type){
case TokenType_Unset:
returnError("token of undefined type");
returnError(Error_TokenUnset);
case TokenType_SingleLineComment:
case TokenType_MultiLineComment:
// skip comments
@ -42,6 +165,8 @@ bool Compiler_parse(Compiler* cmp){
Section_init(sec, Compiler_extractTokenStr(cmp, tok));
break;
case TokenType_Instruction:
if(sec == NULL)
returnError("no section");
char* instr_name = Compiler_extractTokenStr(cmp, tok);
// data definition starts with const
if(cstr_seek(instr_name, "const", 0, 1)){

View File

@ -13,7 +13,8 @@ typedef enum TokenType {
TokenType_String, // "aaaa"
TokenType_Name, // xyz
TokenType_NamedDataPointer, // @xyz
TokenType_NamedDataSize // #xyz
TokenType_NamedDataSize, // #xyz
TokenType_OperationEnd, // EOL or EOF or ;
} TokenType;
cstr TokenType_toString(TokenType t);
@ -26,4 +27,4 @@ typedef struct Token {
List_declare(Token);
#define Token_construct(TYPE, BEGIN, END) ((Token){ .type = TYPE, .begin = BEGIN, .length = END })
#define Token_construct(TYPE, BEGIN, LEN) ((Token){ .type = TYPE, .begin = BEGIN, .length = LEN })

View File

@ -7,13 +7,136 @@
#define setError_unexpectedToken(T) {\
char* tok_str = Compiler_extractTokenStr(cmp, T);\
setError("unexpected character '%s'", tok_str);\
cmp->pos = T.begin;\
Compiler_setError(cmp, "unexpected token '%s'", tok_str);\
free(tok_str);\
}
#define setError_unexpectedTokenChar(T, I) {\
cmp->pos = T.begin + I;\
Compiler_setError(cmp, "unexpected token '%c'", cmp->code[cmp->pos]);\
}
static void parseDataDefinition(Compiler* cmp, char* instr_name, DataDefinition* dataDefPtr){
#define Error_TokenUnset "token of undefined type"
#define Error_BitSize "invalid size in bits"
static void List_u8_pushBytes(List_u8* l, void* value, u32 startIndex, u32 count){
u8* v = value;
for(u32 byte_i = startIndex; byte_i < startIndex + count; byte_i++){
List_u8_push(l, v[byte_i]);
}
}
static inline bool isVarSizeBits(u32 B) { return (B == 8 && B == 16 && B == 32 && B == 64); }
static NULLABLE(u8*) resolveEscapeSequences(Compiler* cmp, cstr src){
u32 len = strlen(src);
List_u8 resolved = List_u8_alloc(len);
char c;
bool escaped = false;
for(u32 i = 0; i < len; i++){
c = src[i];
if(c == '\\'){
escaped = !escaped;
continue;
}
if(!escaped){
List_u8_push(&resolved, c);
continue;
}
// escape codes
switch(c){
case '0':
List_u8_push(&resolved, '\0');
break;
case 'n':
List_u8_push(&resolved, '\n');
break;
case 'r':
List_u8_push(&resolved, '\r');
break;
case 't':
List_u8_push(&resolved, '\t');
break;
case 'e':
List_u8_push(&resolved, '\e');
break;
default:
setError_unexpectedTokenChar(cmp->tokens.data[cmp->tok_i], i);
free(resolved.data);
return NULL;
}
}
return resolved.data;
}
static void parseDataDefinition(Compiler* cmp, char* instr_name, DataDefinition* ddf){
i32 _element_size_bits;
if(sscanf(instr_name, "const%i", &_element_size_bits) != 1 || !isVarSizeBits(_element_size_bits)){
setError(Error_BitSize);
return;
}
ddf->element_size = _element_size_bits / 8;
Token tok = cmp->tokens.data[++cmp->tok_i];
char* tok_str = Compiler_extractTokenStr(cmp, tok);
u8* processed_str = NULL;
i32 len = 0;
ddf->name = tok_str;
while(++cmp->tok_i < cmp->tokens.len){
switch(tok.type){
case TokenType_Unset:
setError(Error_TokenUnset);
return;
case TokenType_SingleLineComment:
case TokenType_MultiLineComment:
// skip comments
break;
case TokenType_Number:
tok_str = Compiler_extractTokenStr(cmp, tok);
if(cstr_seekChar(tok_str, '.', 0, -1) != -1){
f64 f = atof(tok_str);
List_u8_pushBytes(&ddf->data, &f, 8 - ddf->element_size, ddf->element_size);
}
else {
i64 i = atoll(tok_str);
List_u8_pushBytes(&ddf->data, &i, 8 - ddf->element_size, ddf->element_size);
}
break;
case TokenType_Char:
tok.begin += 1;
tok.length -= 2;
tok_str = Compiler_extractTokenStr(cmp, tok);
processed_str = resolveEscapeSequences(cmp, tok_str);
free(tok_str);
len = strlen(processed_str);
if(len != ddf->element_size){
setError("can't fit char of size %i in %u bit variable", len, _element_size_bits);
return;
}
List_u8_pushBytes(&ddf->data, processed_str, 0, len);
break;
case TokenType_String:
tok.begin += 1;
tok.length -= 2;
tok_str = Compiler_extractTokenStr(cmp, tok);
processed_str = resolveEscapeSequences(cmp, tok_str);
free(tok_str);
len = strlen(processed_str);
List_u8_pushBytes(&ddf->data, processed_str, 0, len);
break;
case TokenType_OperationEnd:
return;
default:
setError_unexpectedToken(tok);
return;
}
}
}
@ -31,7 +154,7 @@ bool Compiler_parse(Compiler* cmp){
tok = cmp->tokens.data[cmp->tok_i];
switch(tok.type){
case TokenType_Unset:
returnError("token of undefined type");
returnError(Error_TokenUnset);
case TokenType_SingleLineComment:
case TokenType_MultiLineComment:
// skip comments
@ -42,6 +165,8 @@ bool Compiler_parse(Compiler* cmp){
Section_init(sec, Compiler_extractTokenStr(cmp, tok));
break;
case TokenType_Instruction:
if(sec == NULL)
returnError("no section");
char* instr_name = Compiler_extractTokenStr(cmp, tok);
// data definition starts with const
if(cstr_seek(instr_name, "const", 0, 1)){

View File

@ -13,7 +13,8 @@ typedef enum TokenType {
TokenType_String, // "aaaa"
TokenType_Name, // xyz
TokenType_NamedDataPointer, // @xyz
TokenType_NamedDataSize // #xyz
TokenType_NamedDataSize, // #xyz
TokenType_OperationEnd, // EOL or EOF or ;
} TokenType;
cstr TokenType_toString(TokenType t);
@ -26,4 +27,4 @@ typedef struct Token {
List_declare(Token);
#define Token_construct(TYPE, BEGIN, END) ((Token){ .type = TYPE, .begin = BEGIN, .length = END })
#define Token_construct(TYPE, BEGIN, LEN) ((Token){ .type = TYPE, .begin = BEGIN, .length = LEN })

View File

@ -90,3 +90,31 @@ i32 cstr_seekReverse(const char* src, const char* fragment, u32 startIndex, u32
}
return -1;
}
i32 cstr_seekChar(const char* src, char fragment, u32 startIndex, u32 seekLength){
char sc=*src;
if(sc==0 || fragment==0)
return -1;
for(u32 si=startIndex; si-startIndex<seekLength && sc!=0; si++){
sc=src[si];
if(sc==fragment)
return si;
}
return -1;
}
i32 cstr_seekCharReverse(const char* src, char fragment, u32 startIndex, u32 seekLength){
char sc=*src;
if(sc==0 || fragment==0)
return -1;
i32 len=strlen(src);
if(startIndex==(u32)-1)
startIndex=len-1;
for(u32 si=startIndex; si<(u32)-1 && si!=len-1-seekLength; si--){
sc=src[si];
if(sc==fragment)
return si;
}
return -1;
}

View File

@ -27,6 +27,7 @@ typedef u8 bool;
typedef const char* cstr;
#define ARRAY_SIZE(A) sizeof(A)/sizeof(A[0])
#define ALIGN_TO(_SIZE,_ALIGN) (((_SIZE) + ((_ALIGN) - 1)) & ~((_ALIGN) - 1))
#define __count_args( \
a0, a1, a2, a3, a4, a5, a6, a7 , a8, a9, a10,a11,a12,a13,a14,a15, \
@ -67,3 +68,13 @@ i32 cstr_seek(const char* src, const char* fragment, u32 startIndex, u32 seekLen
/// @param seekLength 0 ... -1
/// @return pos of first <fragment> inclusion in <src> or -1 if not found
i32 cstr_seekReverse(const char* src, const char* fragment, u32 startIndex, u32 seekLength);
/// @param startIndex 0 ... src length
/// @param seekLength 0 ... -1
/// @return pos of first <fragment> inclusion in <src> or -1 if not found
i32 cstr_seekChar(const char* src, char fragment, u32 startIndex, u32 seekLength);
/// @param startIndex -1 ... src length
/// @param seekLength 0 ... -1
/// @return pos of first <fragment> inclusion in <src> or -1 if not found
i32 cstr_seekCharReverse(const char* src, char fragment, u32 startIndex, u32 seekLength);