From 83e28c9022a52337faaca088e24d696bc6b111ca Mon Sep 17 00:00:00 2001 From: Timerix Date: Tue, 21 Jan 2025 01:17:30 +0500 Subject: [PATCH] started working on parser --- examples/s.tasm | 6 ++-- src/collections/List.h | 19 +++++++---- src/compiler/AST.c | 42 +++++++++++++++++++++++- src/compiler/AST.h | 37 +++++++++++++-------- src/compiler/Compiler.c | 9 +++++ src/compiler/compiler.h | 2 ++ src/compiler/compiler_internal.h | 2 ++ src/compiler/parser.c | 56 ++++++++++++++++++++++++++++++++ src/cstr.c | 40 +++++++++++++++++++++++ src/main.c | 2 +- src/std.h | 10 ++++++ 11 files changed, 201 insertions(+), 24 deletions(-) diff --git a/examples/s.tasm b/examples/s.tasm index 9c86ae8..39348a5 100644 --- a/examples/s.tasm +++ b/examples/s.tasm @@ -2,9 +2,11 @@ "hello world" program in my assembly language */ -// named data array -c8 msg "Hello, World :3\0" +.data: +// named array of 8-bit values +const8 msg "Hello, World :3\0" +.main: push ax 1; // sys_write push bx 1; // stdout push cx @msg; // address of msg data diff --git a/src/collections/List.h b/src/collections/List.h index 633f04b..ba2d3d0 100644 --- a/src/collections/List.h +++ b/src/collections/List.h @@ -16,22 +16,27 @@ return List_##T##_construct((T*)(len > 0 ? malloc(len * sizeof(T)) : NULL), 0, 0);\ }\ \ - void List_##T##_push(List_##T* ptr, T value); + T* List_##T##_expand(List_##T* ptr);\ + void List_##T##_push(List_##T* ptr, T value);\ #define List_define(T)\ - void List_##T##_push(List_##T* ptr, T value){\ - u32 max_len = ptr->max_len;\ - if(ptr->len == max_len){\ - max_len = max_len * 1.5;\ + T* List_##T##_expand(List_##T* ptr){\ + if(ptr->len == ptr->max_len){\ + u32 max_len = ptr->max_len * 1.5;\ max_len += __List_padding_in_sizeof_T(T);\ /* branchless version of max(max_len, __List_min_size) */\ max_len += (max_len < __List_min_size) * (__List_min_size - max_len);\ ptr->data = (T*)realloc(ptr->data, max_len * sizeof(T));\ ptr->max_len = max_len;\ }\ - ptr->data[ptr->len++] = value;\ - } + return &ptr->data[ptr->len++];\ + }\ + \ + void List_##T##_push(List_##T* ptr, T value){\ + T* empty_cell_ptr = List_##T##_expand(ptr);\ + *empty_cell_ptr = value;\ + }\ #define __List_min_size 16 diff --git a/src/compiler/AST.c b/src/compiler/AST.c index 6a7ebf3..76c1b1e 100644 --- a/src/compiler/AST.c +++ b/src/compiler/AST.c @@ -2,4 +2,44 @@ List_define(Argument); List_define(Operation); -List_define(DataDefinition); \ No newline at end of file +List_define(DataDefinition); + +static cstr _ArgumentType_str[] = { + "Unset", + "Register", + "ConstValue", + "DataName", + "NamedDataPointer", + "NamedDataSize", +}; + +cstr ArgumentType_toString(ArgumentType t){ + if(t >= ARRAY_SIZE(_ArgumentType_str)) + return "!!INDEX_ERROR!!"; + return _ArgumentType_str[t]; +} + + +void Section_init(Section* sec, char* name){ + sec->name = name; + sec->data = List_DataDefinition_alloc(256); + sec->code = List_Operation_alloc(1024); +} + +void Section_free(Section* sec){ + free(sec->name); + free(sec->data.data); + free(sec->code.data); +} + + +void AST_init(AST* ast){ + ast->sections = List_Section_alloc(32); +} + +void AST_free(AST* ast){ + for(u32 i = 0; i != ast->sections.len; i++){ + Section_free(&ast->sections.data[i]); + } + free(ast->sections.data); +} diff --git a/src/compiler/AST.h b/src/compiler/AST.h index 74b75cb..f3804bc 100644 --- a/src/compiler/AST.h +++ b/src/compiler/AST.h @@ -4,7 +4,7 @@ #include "../collections/List.h" typedef enum ArgumentType { - ArgumentType_NoArgument, + ArgumentType_Unset, ArgumentType_Register, ArgumentType_ConstValue, ArgumentType_DataName, @@ -12,6 +12,9 @@ typedef enum ArgumentType { ArgumentType_NamedDataSize, } ArgumentType; +cstr ArgumentType_toString(ArgumentType t); + + typedef struct Argument { ArgumentType type; u32 value; @@ -19,31 +22,39 @@ typedef struct Argument { List_declare(Argument); + typedef struct Operation { - Opcode op; List_Argument args; + Opcode op; } Operation; List_declare(Operation); + typedef struct DataDefinition { - u8 element_size; - u16 count; + u32 element_size; + u32 count; cstr name; void* data; } DataDefinition; List_declare(DataDefinition); -typedef struct AST { + +typedef struct Section { + char* name; List_DataDefinition data; - List_Operation operations; + List_Operation code; +} Section; + +List_declare(Section); + +void Section_init(Section* Section, char* name); +void Section_free(Section* Section); + +typedef struct AST { + List_Section sections; } AST; -/* -d8 name '' - -goto label -.label: -code... -*/ \ No newline at end of file +void AST_init(AST* ast); +void AST_free(AST* ast); diff --git a/src/compiler/Compiler.c b/src/compiler/Compiler.c index 26d9b48..853a453 100644 --- a/src/compiler/Compiler.c +++ b/src/compiler/Compiler.c @@ -6,12 +6,14 @@ void Compiler_init(Compiler* cmp){ cmp->state = CompilerState_Initial; cmp->tokens = List_Token_alloc(4096); cmp->line_lengths = List_u32_alloc(1024); + AST_init(&cmp->ast); } void Compiler_free(Compiler* cmp){ free(cmp->code); free(cmp->tokens.data); free(cmp->line_lengths.data); + AST_free(&cmp->ast); } CodePos Compiler_getLineAndColumn(Compiler* cmp, u32 pos){ @@ -54,6 +56,13 @@ void _Compiler_setError(Compiler* cmp, cstr context, cstr format, ...){ Compiler_setError(cmp, FORMAT, ##__VA_ARGS__);\ } +char* Compiler_extractTokenStr(Compiler* cmp, Token t){ + char* s = malloc(t.length + 1); + memcpy(s, cmp->code, t.length); + s[t.length] = 0; + return s; +} + static bool compileFile(Compiler* cmp, FILE* f){ returnErrorIf_auto(cmp->state != CompilerState_Parsing); cmp->state = CompilerState_Compiling; diff --git a/src/compiler/compiler.h b/src/compiler/compiler.h index cb5485b..6124102 100644 --- a/src/compiler/compiler.h +++ b/src/compiler/compiler.h @@ -22,6 +22,8 @@ typedef struct Compiler { NULLABLE(char* error_message); List_Token tokens; List_u32 line_lengths; + AST ast; + u32 tok_i; } Compiler; void Compiler_init(Compiler* cmp); diff --git a/src/compiler/compiler_internal.h b/src/compiler/compiler_internal.h index 4e79443..f3ce11c 100644 --- a/src/compiler/compiler_internal.h +++ b/src/compiler/compiler_internal.h @@ -23,5 +23,7 @@ typedef struct CodePos { /// @param pos index in code buffer CodePos Compiler_getLineAndColumn(Compiler* cmp, u32 pos); +char* Compiler_extractTokenStr(Compiler* cmp, Token t); + bool Compiler_lex(Compiler* cmp); bool Compiler_parse(Compiler* cmp); diff --git a/src/compiler/parser.c b/src/compiler/parser.c index 12b2fa6..ebf366b 100644 --- a/src/compiler/parser.c +++ b/src/compiler/parser.c @@ -1,12 +1,68 @@ #include "Compiler_internal.h" #define setError(FORMAT, ...) {\ + cmp->pos = cmp->tokens.data[cmp->tok_i].begin;\ Compiler_setError(cmp, FORMAT, ##__VA_ARGS__);\ } +#define setError_unexpectedToken(T) {\ + char* tok_str = Compiler_extractTokenStr(cmp, T);\ + setError("unexpected character '%s'", tok_str);\ + free(tok_str);\ +} + + +static void parseDataDefinition(Compiler* cmp, char* instr_name, DataDefinition* dataDefPtr){ + +} + + +static void parseOperation(Compiler* cmp, char* instr_name, Operation* operPtr){ + +} + bool Compiler_parse(Compiler* cmp){ returnErrorIf_auto(cmp->state != CompilerState_Lexing); cmp->state = CompilerState_Parsing; + Token tok; + Section* sec = NULL; + + while(cmp->tok_i < cmp->tokens.len){ + tok = cmp->tokens.data[cmp->tok_i]; + switch(tok.type){ + case TokenType_Unset: + returnError("token of undefined type"); + case TokenType_SingleLineComment: + case TokenType_MultiLineComment: + // skip comments + break; + case TokenType_Label: + // create new section + sec = List_Section_expand(&cmp->ast.sections); + Section_init(sec, Compiler_extractTokenStr(cmp, tok)); + break; + case TokenType_Instruction: + char* instr_name = Compiler_extractTokenStr(cmp, tok); + // data definition starts with const + if(cstr_seek(instr_name, "const", 0, 1)){ + DataDefinition* dataDefPtr = List_DataDefinition_expand(&sec->data); + parseDataDefinition(cmp, instr_name, dataDefPtr); + } + else { + Operation* operPtr = List_Operation_expand(&sec->code); + parseOperation(cmp, instr_name, operPtr); + } + break; + default: + setError_unexpectedToken(tok); + return false; + } + + if(cmp->state == CompilerState_Error) + return false; + + cmp->tok_i++; + } return true; } diff --git a/src/cstr.c b/src/cstr.c index af6aac1..37f0a36 100644 --- a/src/cstr.c +++ b/src/cstr.c @@ -50,3 +50,43 @@ char* NULLABLE(vsprintf_malloc)(size_t buffer_size, cstr format, va_list argv){ } return buf; } + +i32 cstr_seek(const char* src, const char* fragment, u32 startIndex, u32 seekLength){ + char sc = *src, fc = *fragment; + if(sc == 0 || fc == 0) + return -1; + u32 fr_start = startIndex; + for(u32 si = startIndex; si-startIndex < seekLength && sc != 0; si++){ + sc = src[si]; + fc = fragment[si-fr_start]; + if(fc == 0) + return fr_start; + if(sc != fc) + fr_start++; + } + return -1; +} + +i32 cstr_seekReverse(const char* src, const char* fragment, u32 startIndex, u32 seekLength){ + char sc = *src, fc = *fragment; + if(sc == 0 || fc == 0) + return -1; + i32 len = strlen(src); + if(startIndex == (u32)-1) + startIndex = len-1; + u32 fr_len = strlen(fragment); + for(u32 si = startIndex; si < (u32)-1 && si != (len - seekLength - 1); si--){ + if(si + 1 < fr_len) + return -1; + sc = src[si]; + fc = fragment[0]; + u32 fr_start = si; + for(u32 fi = 0; fc == sc ; fi++){ + if(fi == fr_len) + return fr_start; + fc = fragment[fi]; + sc = src[si--]; + } + } + return -1; +} \ No newline at end of file diff --git a/src/main.c b/src/main.c index f8dba23..8546ced 100644 --- a/src/main.c +++ b/src/main.c @@ -1,7 +1,7 @@ #include "VM/VM.h" #include "instructions/instructions.h" #include "collections/List.h" -#include "compiler/compiler.h" +#include "compiler/Compiler.h" #define arg_is(STR) (strcmp(argv[argi], STR) == 0) diff --git a/src/std.h b/src/std.h index bb63bfa..64ea391 100644 --- a/src/std.h +++ b/src/std.h @@ -57,3 +57,13 @@ char* NULLABLE(vsprintf_malloc)(size_t buffer_size, cstr format, va_list argv); static inline bool isAlphabeticalLower(char c) { return 'a' <= c && c <= 'z'; } static inline bool isAlphabeticalUpper(char c) { return 'A' <= c && c <= 'Z'; } static inline bool isDigit(char c) { return '0' <= c && c <= '9'; } + +/// @param startIndex 0 ... src length +/// @param seekLength 0 ... -1 +/// @return pos of first inclusion in or -1 if not found +i32 cstr_seek(const char* src, const char* fragment, u32 startIndex, u32 seekLength); + +/// @param startIndex -1 ... src length +/// @param seekLength 0 ... -1 +/// @return pos of first inclusion in or -1 if not found +i32 cstr_seekReverse(const char* src, const char* fragment, u32 startIndex, u32 seekLength);