Compare commits

..

2 Commits

Author SHA1 Message Date
2831474f79 str and StringBuilder 2025-02-02 00:48:34 +05:00
2d3e66dd38 rename fix 2025-01-28 15:18:20 +03:00
13 changed files with 163 additions and 327 deletions

View File

@ -12,9 +12,9 @@
}\
\
static inline Array_##T Array_##T##_alloc(u32 len){\
return Array_##T##_construct(malloc(len * sizeof(T)), len);\
return Array_##T##_construct((T*)malloc(len * sizeof(T)), len);\
}\
static inline void Array_##T##_realloc(Array_##T* ptr, u32 new_len){\
ptr->data = realloc(ptr->data, new_len * sizeof(T));\
ptr->data = (T*)realloc(ptr->data, new_len * sizeof(T));\
ptr->len = new_len;\
}

View File

@ -1,6 +1,9 @@
#pragma once
#include "../std.h"
// minimal max_len after initial (0)
#define __List_min_size 16
#define List_declare(T)\
typedef struct List_##T {\
T* data;\
@ -14,35 +17,42 @@
\
List_##T List_##T##_alloc(u32 initial_len);\
\
T* List_##T##_expand(List_##T* ptr);\
T* List_##T##_expand(List_##T* ptr, u32 count);\
void List_##T##_push(List_##T* ptr, T value);\
void List_##T##_pushMany(List_##T* ptr, T* values, u32 count);\
#define List_define(T)\
List_##T List_##T##_alloc(u32 initial_len){\
if(initial_len == 0)\
return List_##T##_construct((T*)NULL, 0, 0);\
u32 max_Len = ALIGN_TO(initial_len, sizeof(void*)/sizeof(T));\
u32 max_len = ALIGN_TO(initial_len, sizeof(void*)/sizeof(T));\
/* branchless version of max(max_len, __List_min_size) */\
max_len += (max_len < __List_min_size) * (__List_min_size - max_len);\
return List_##T##_construct((T*)malloc(initial_len * sizeof(T)), 0, max_len);\
}\
\
T* List_##T##_expand(List_##T* ptr){\
if(ptr->len == ptr->max_len){\
ptr->max_len *= 2;\
ptr->data = (T*)realloc(ptr->data, ptr->max_len * sizeof(T));\
ptr->max_len = max_len;\
T* List_##T##_expand(List_##T* ptr, u32 count){\
u32 occupied_len = ptr->len;\
u32 expanded_max_len = ptr->max_len;\
ptr->len += count;\
while(ptr->len > ptr->max_len){\
expanded_max_len *= 2;\
}\
return &ptr->data[ptr->len++];\
ptr->data = (T*)realloc(ptr->data, expanded_max_len * sizeof(T));\
return ptr->data + occupied_len;\
}\
\
void List_##T##_push(List_##T* ptr, T value){\
T* empty_cell_ptr = List_##T##_expand(ptr);\
T* empty_cell_ptr = List_##T##_expand(ptr, 1);\
*empty_cell_ptr = value;\
}\
\
void List_##T##_pushMany(List_##T* ptr, T* values, u32 count){\
T* empty_cell_ptr = List_##T##_expand(ptr, count);\
memcpy(empty_cell_ptr, values, count * sizeof(T));\
}\
#define __List_min_size 16
List_declare(cstr);
List_declare(u32);

View File

@ -3,6 +3,7 @@
List_define(Argument);
List_define(Operation);
List_define(DataDefinition);
List_define(Section);
static cstr _ArgumentType_str[] = {
"Unset",

View File

@ -85,7 +85,7 @@ static void parseDataDefinition(Compiler* cmp, char* instr_name, DataDefinition*
Token tok = cmp->tokens.data[++cmp->tok_i];
char* tok_str = Compiler_extractTokenStr(cmp, tok);
u8* processed_str = NULL;
i32 len = 0;
u32 len = 0;
ddf->name = tok_str;
while(++cmp->tok_i < cmp->tokens.len){
@ -114,7 +114,7 @@ static void parseDataDefinition(Compiler* cmp, char* instr_name, DataDefinition*
tok_str = Compiler_extractTokenStr(cmp, tok);
processed_str = resolveEscapeSequences(cmp, tok_str);
free(tok_str);
len = strlen(processed_str);
len = strlen((char*)processed_str);
if(len != ddf->element_size){
setError("can't fit char of size %i in %u bit variable", len, _element_size_bits);
return;
@ -127,7 +127,7 @@ static void parseDataDefinition(Compiler* cmp, char* instr_name, DataDefinition*
tok_str = Compiler_extractTokenStr(cmp, tok);
processed_str = resolveEscapeSequences(cmp, tok_str);
free(tok_str);
len = strlen(processed_str);
len = strlen((char*)processed_str);
List_u8_pushBytes(&ddf->data, processed_str, 0, len);
break;
case TokenType_OperationEnd:
@ -161,7 +161,7 @@ bool Compiler_parse(Compiler* cmp){
break;
case TokenType_Label:
// create new section
sec = List_Section_expand(&cmp->ast.sections);
sec = List_Section_expand(&cmp->ast.sections, 1);
Section_init(sec, Compiler_extractTokenStr(cmp, tok));
break;
case TokenType_Instruction:
@ -170,11 +170,11 @@ bool Compiler_parse(Compiler* cmp){
char* instr_name = Compiler_extractTokenStr(cmp, tok);
// data definition starts with const
if(cstr_seek(instr_name, "const", 0, 1)){
DataDefinition* dataDefPtr = List_DataDefinition_expand(&sec->data);
DataDefinition* dataDefPtr = List_DataDefinition_expand(&sec->data, 1);
parseDataDefinition(cmp, instr_name, dataDefPtr);
}
else {
Operation* operPtr = List_Operation_expand(&sec->code);
Operation* operPtr = List_Operation_expand(&sec->code, 1);
parseOperation(cmp, instr_name, operPtr);
}
break;

View File

@ -1,34 +0,0 @@
#pragma once
#include "../std.h"
#include "../collections/List.h"
#include "Token.h"
#include "AST.h"
typedef enum CompilerState {
CompilerState_Initial,
CompilerState_Lexing,
CompilerState_Parsing,
CompilerState_Compiling,
CompilerState_Error,
CompilerState_Success
} CompilerState;
typedef struct Compiler {
char* code;
u32 code_len;
u32 column; // > 0 if code parsing started
u32 pos;
CompilerState state;
NULLABLE(char* error_message);
List_Token tokens;
List_u32 line_lengths;
AST ast;
u32 tok_i;
} Compiler;
void Compiler_init(Compiler* cmp);
void Compiler_free(Compiler* cmp);
/// @brief compile assembly language code to machine code
/// @return true if no errors, false if any error occured (check cmp->error_message)
bool Compiler_compile(Compiler* cmp, cstr source_file_name, cstr out_file_name, bool debug);

View File

@ -1,29 +0,0 @@
#include "Compiler.h"
void _Compiler_setError(Compiler* cmp, cstr context, cstr format, ...) __attribute__((__format__(__printf__, 3, 4)));
#define Compiler_setError(cmp, format, ...) _Compiler_setError(cmp, __func__, format ,##__VA_ARGS__)
#define returnError(FORMAT, ...) {\
setError(FORMAT, ##__VA_ARGS__);\
return false;\
}
#define returnErrorIf(STATEMENT, FORMAT, ...) if(STATEMENT) returnError(FORMAT, ##__VA_ARGS__)
#define returnErrorIf_auto(STATEMENT) returnErrorIf(STATEMENT, #STATEMENT)
typedef struct CodePos {
u32 line; // 0 on error
u32 column; // 0 on error
} CodePos;
#define CodePos_create(L, C) ((CodePos){ .line = L, .column = C })
/// @param pos index in code buffer
CodePos Compiler_getLineAndColumn(Compiler* cmp, u32 pos);
char* Compiler_extractTokenStr(Compiler* cmp, Token t);
bool Compiler_lex(Compiler* cmp);
bool Compiler_parse(Compiler* cmp);

View File

@ -1,193 +0,0 @@
#include "Compiler_internal.h"
#define setError(FORMAT, ...) {\
cmp->pos = cmp->tokens.data[cmp->tok_i].begin;\
Compiler_setError(cmp, FORMAT, ##__VA_ARGS__);\
}
#define setError_unexpectedToken(T) {\
char* tok_str = Compiler_extractTokenStr(cmp, T);\
cmp->pos = T.begin;\
Compiler_setError(cmp, "unexpected token '%s'", tok_str);\
free(tok_str);\
}
#define setError_unexpectedTokenChar(T, I) {\
cmp->pos = T.begin + I;\
Compiler_setError(cmp, "unexpected token '%c'", cmp->code[cmp->pos]);\
}
#define Error_TokenUnset "token of undefined type"
#define Error_BitSize "invalid size in bits"
static void List_u8_pushBytes(List_u8* l, void* value, u32 startIndex, u32 count){
u8* v = value;
for(u32 byte_i = startIndex; byte_i < startIndex + count; byte_i++){
List_u8_push(l, v[byte_i]);
}
}
static inline bool isVarSizeBits(u32 B) { return (B == 8 && B == 16 && B == 32 && B == 64); }
static NULLABLE(u8*) resolveEscapeSequences(Compiler* cmp, cstr src){
u32 len = strlen(src);
List_u8 resolved = List_u8_alloc(len);
char c;
bool escaped = false;
for(u32 i = 0; i < len; i++){
c = src[i];
if(c == '\\'){
escaped = !escaped;
continue;
}
if(!escaped){
List_u8_push(&resolved, c);
continue;
}
// escape codes
switch(c){
case '0':
List_u8_push(&resolved, '\0');
break;
case 'n':
List_u8_push(&resolved, '\n');
break;
case 'r':
List_u8_push(&resolved, '\r');
break;
case 't':
List_u8_push(&resolved, '\t');
break;
case 'e':
List_u8_push(&resolved, '\e');
break;
default:
setError_unexpectedTokenChar(cmp->tokens.data[cmp->tok_i], i);
free(resolved.data);
return NULL;
}
}
return resolved.data;
}
static void parseDataDefinition(Compiler* cmp, char* instr_name, DataDefinition* ddf){
i32 _element_size_bits;
if(sscanf(instr_name, "const%i", &_element_size_bits) != 1 || !isVarSizeBits(_element_size_bits)){
setError(Error_BitSize);
return;
}
ddf->element_size = _element_size_bits / 8;
Token tok = cmp->tokens.data[++cmp->tok_i];
char* tok_str = Compiler_extractTokenStr(cmp, tok);
u8* processed_str = NULL;
i32 len = 0;
ddf->name = tok_str;
while(++cmp->tok_i < cmp->tokens.len){
switch(tok.type){
case TokenType_Unset:
setError(Error_TokenUnset);
return;
case TokenType_SingleLineComment:
case TokenType_MultiLineComment:
// skip comments
break;
case TokenType_Number:
tok_str = Compiler_extractTokenStr(cmp, tok);
if(cstr_seekChar(tok_str, '.', 0, -1) != -1){
f64 f = atof(tok_str);
List_u8_pushBytes(&ddf->data, &f, 8 - ddf->element_size, ddf->element_size);
}
else {
i64 i = atoll(tok_str);
List_u8_pushBytes(&ddf->data, &i, 8 - ddf->element_size, ddf->element_size);
}
break;
case TokenType_Char:
tok.begin += 1;
tok.length -= 2;
tok_str = Compiler_extractTokenStr(cmp, tok);
processed_str = resolveEscapeSequences(cmp, tok_str);
free(tok_str);
len = strlen(processed_str);
if(len != ddf->element_size){
setError("can't fit char of size %i in %u bit variable", len, _element_size_bits);
return;
}
List_u8_pushBytes(&ddf->data, processed_str, 0, len);
break;
case TokenType_String:
tok.begin += 1;
tok.length -= 2;
tok_str = Compiler_extractTokenStr(cmp, tok);
processed_str = resolveEscapeSequences(cmp, tok_str);
free(tok_str);
len = strlen(processed_str);
List_u8_pushBytes(&ddf->data, processed_str, 0, len);
break;
case TokenType_OperationEnd:
return;
default:
setError_unexpectedToken(tok);
return;
}
}
}
static void parseOperation(Compiler* cmp, char* instr_name, Operation* operPtr){
}
bool Compiler_parse(Compiler* cmp){
returnErrorIf_auto(cmp->state != CompilerState_Lexing);
cmp->state = CompilerState_Parsing;
Token tok;
Section* sec = NULL;
while(cmp->tok_i < cmp->tokens.len){
tok = cmp->tokens.data[cmp->tok_i];
switch(tok.type){
case TokenType_Unset:
returnError(Error_TokenUnset);
case TokenType_SingleLineComment:
case TokenType_MultiLineComment:
// skip comments
break;
case TokenType_Label:
// create new section
sec = List_Section_expand(&cmp->ast.sections);
Section_init(sec, Compiler_extractTokenStr(cmp, tok));
break;
case TokenType_Instruction:
if(sec == NULL)
returnError("no section");
char* instr_name = Compiler_extractTokenStr(cmp, tok);
// data definition starts with const
if(cstr_seek(instr_name, "const", 0, 1)){
DataDefinition* dataDefPtr = List_DataDefinition_expand(&sec->data);
parseDataDefinition(cmp, instr_name, dataDefPtr);
}
else {
Operation* operPtr = List_Operation_expand(&sec->code);
parseOperation(cmp, instr_name, operPtr);
}
break;
default:
setError_unexpectedToken(tok);
return false;
}
if(cmp->state == CompilerState_Error)
return false;
cmp->tok_i++;
}
return true;
}

View File

@ -1,23 +0,0 @@
#include "Token.h"
List_define(Token);
static cstr _TokenType_str[] = {
"Unset",
"SingleLineComment",
"MultiLineComment",
"Instruction",
"Label",
"Number",
"Char",
"String",
"Name",
"NamedDataPointer",
"NamedDataSize"
};
cstr TokenType_toString(TokenType t){
if(t >= ARRAY_SIZE(_TokenType_str))
return "!!INDEX_ERROR!!";
return _TokenType_str[t];
}

View File

@ -1,30 +0,0 @@
#pragma once
#include "../std.h"
#include "../collections/List.h"
typedef enum TokenType {
TokenType_Unset, // initial value
TokenType_SingleLineComment, // //comment
TokenType_MultiLineComment, // /* comment */
TokenType_Instruction, // abc
TokenType_Label, // .abc:
TokenType_Number, // 0123
TokenType_Char, // 'A'
TokenType_String, // "aaaa"
TokenType_Name, // xyz
TokenType_NamedDataPointer, // @xyz
TokenType_NamedDataSize, // #xyz
TokenType_OperationEnd, // EOL or EOF or ;
} TokenType;
cstr TokenType_toString(TokenType t);
typedef struct Token {
u32 begin; // some index in Compiler->code
u32 length : 24; // length in characters (24 bits)
TokenType type : 8; // type of token (8 bits)
} Token;
List_declare(Token);
#define Token_construct(TYPE, BEGIN, LEN) ((Token){ .type = TYPE, .begin = BEGIN, .length = LEN })

View File

@ -0,0 +1,43 @@
#include "StringBuilder.h"
void StringBuilder_free(StringBuilder* b){
free(b->buffer.data);
}
str StringBuilder_build(StringBuilder* b){
List_u8_push(&b->buffer, '\0');
str result = str_construct((char*)b->buffer.data, b->buffer.len - 1, true);
return result;
}
void StringBuilder_append_char(StringBuilder* b, char c){
List_u8_push(&b->buffer, c);
}
void StringBuilder_append_string(StringBuilder* b, str s){
List_u8_pushMany(&b->buffer, (u8*)s.data, s.len);
}
void StringBuilder_append_cstr(StringBuilder* b, char* s){
StringBuilder_append_string(b, str_construct(s, strlen(s), true));
}
void StringBuilder_append_i64(StringBuilder* b, i64 n){
char buf[32];
sprintf_s(buf, sizeof(buf), "%llu", n);
StringBuilder_append_cstr(b, buf);
}
void StringBuilder_append_u64(StringBuilder* b, u64 n){
char buf[32];
sprintf_s(buf, sizeof(buf), "%llu", n);
StringBuilder_append_cstr(b, buf);
}
void StringBuilder_append_f64(StringBuilder* b, f64 n){
char buf[32];
sprintf_s(buf, sizeof(buf), "%lf", n);
StringBuilder_append_cstr(b, buf);
}

View File

@ -0,0 +1,32 @@
#pragma once
#if __cplusplus
extern "C" {
#endif
#include "../collections/List.h"
#include "str.h"
typedef struct StringBuilder {
List_u8 buffer;
} StringBuilder;
static inline StringBuilder StringBuilder_construct(u32 initial_size) {
return (StringBuilder){ .buffer = List_u8_alloc(initial_size) };
}
void StringBuilder_free(StringBuilder* b);
void StringBuilder_removeFromEnd(StringBuilder* b, u32 count);
void StringBuilder_append_char(StringBuilder* b, char c);
void StringBuilder_append_cstr(StringBuilder* b, char* s);
void StringBuilder_append_string(StringBuilder* b, str s);
void StringBuilder_append_i64(StringBuilder* b, i64 a);
void StringBuilder_append_u64(StringBuilder* b, u64 a);
void StringBuilder_append_f64(StringBuilder* b, f64 a);
// adds '\0' to the buffer and returns pointer to buffer content
str StringBuilder_getStr(StringBuilder* b);
#if __cplusplus
}
#endif

29
src/string/str.c Normal file
View File

@ -0,0 +1,29 @@
#include "str.h"
str str_copy(str src){
if(src.data == NULL || src.len == 0)
return src;
str nstr = str_construct((char*)malloc(src.len + 1), src.len, true);
memcpy(nstr.data, src.data, src.len);
nstr.data[nstr.len] = '\0';
return nstr;
}
bool str_compare(str str0, str str1){
if(str0.len!= str1.len) return false;
if(!str0.data) return str1.data ? false : true;
else if(!str1.data) return false;
while(str0.len-- > 0)
if(*str0.data++ != *str1.data++ )
return false;
return true;
}
str str_reverse(str s){
if(s.data == NULL || s.len == 0)
return s;
str r = str_construct(malloc(s.len), s.len, s.isZeroTerminated);
for(u32 i = 0; i < s.len; i++ )
r.data[i] = s.data[s.len - i - 1];
return r;
}

30
src/string/str.h Normal file
View File

@ -0,0 +1,30 @@
#pragma once
#include "../std.h"
typedef struct str {
char* data;
u32 len;
bool isZeroTerminated;
} str;
#define str_construct(DATA, LEN, ZERO_TERMINATED) ((str){ .data = DATA, .len = LEN, .isZeroTerminated = ZERO_TERMINATED })
static const str str_null = str_construct(NULL, 0, 0);
/// copies str content to new char pointer value (adding '\0' at the end)
char* str_extractcstr(str str);
/// copies src content to new string and adds \0 at the end
str str_copy(str src);
/// compares two strings, NullPtr-friendly
bool str_compare(str str0, str str1);
/// allocates new string which is reversed variant of <s>
str str_reverse(str s);
#if __cplusplus
}
#endif