From 2faff9198105a0c0f46b0c6dabfef34c627bad05 Mon Sep 17 00:00:00 2001 From: Timerix Date: Mon, 3 Feb 2025 21:14:02 +0500 Subject: [PATCH] Parser_parseOperation --- src/collections/Array.h | 26 ++++---- src/collections/HashMap.h | 7 ++- src/compiler/AST.c | 21 +++++-- src/compiler/AST.h | 24 ++++++-- src/compiler/Compiler.c | 6 +- src/compiler/Parser.c | 106 ++++++++++++++++++++++++++++---- src/compiler/Token.c | 5 +- src/instructions/instructions.c | 28 +++++++-- src/instructions/instructions.h | 1 + 9 files changed, 179 insertions(+), 45 deletions(-) diff --git a/src/collections/Array.h b/src/collections/Array.h index 40aae36..28a55e1 100644 --- a/src/collections/Array.h +++ b/src/collections/Array.h @@ -4,21 +4,21 @@ #define Array_construct(T, DATA, LEN) ((Array_##T){ .data = DATA, .len = LEN }) /// creates Array_##T from a const array -#define ARRAY(T, A...) Array_construct(T, ((u32[])A), ARRAY_SIZE(((u32[])A))) +#define ARRAY(T, A...) Array_construct(T, ((T[])A), ARRAY_SIZE(((T[])A))) #define Array_declare(T)\ - typedef struct Array_##T {\ - T* data;\ - u32 len;\ - } Array_##T;\ - \ - static inline Array_##T Array_##T##_alloc(u32 len){\ - return Array_construct(T, (T*)malloc(len * sizeof(T)), len);\ - }\ - static inline void Array_##T##_realloc(Array_##T* ptr, u32 new_len){\ - ptr->data = (T*)realloc(ptr->data, new_len * sizeof(T));\ - ptr->len = new_len;\ - } +typedef struct Array_##T {\ + T* data;\ + u32 len;\ +} Array_##T;\ +\ +static inline Array_##T Array_##T##_alloc(u32 len){\ + return Array_construct(T, (T*)malloc(len * sizeof(T)), len);\ +}\ +static inline void Array_##T##_realloc(Array_##T* ptr, u32 new_len){\ + ptr->data = (T*)realloc(ptr->data, new_len * sizeof(T));\ + ptr->len = new_len;\ +} Array_declare(u8) Array_declare(u32) diff --git a/src/collections/HashMap.h b/src/collections/HashMap.h index 3adb61b..2d13ec3 100644 --- a/src/collections/HashMap.h +++ b/src/collections/HashMap.h @@ -2,6 +2,7 @@ #include "../std.h" #include "../string/str.h" #include "Array.h" +#include "List.h" //TODO: sorting of bucket and binary search //TODO: delayed deletion @@ -34,7 +35,7 @@ typedef struct HashMap_##T {\ \ void HashMap_##T##_alloc(HashMap_##T* ptr);\ void HashMap_##T##_free(HashMap_##T* ptr);\ -NULLABLE(T*) HashMap_##T##_tryGetPtr(HashMap_##T* ptr, str key);\ +T* NULLABLE(HashMap_##T##_tryGetPtr)(HashMap_##T* ptr, str key);\ bool HashMap_##T##_tryPush(HashMap_##T* ptr, str key, T value);\ bool HashMap_##T##_tryDelete(HashMap_##T* ptr, str key);\ @@ -75,7 +76,7 @@ void HashMap_##T##_free(HashMap_##T* ptr){\ free(ptr->table);\ }\ \ -NULLABLE(T*) HashMap_##T##_tryGetPtr(HashMap_##T* ptr, str key){\ +T* NULLABLE(HashMap_##T##_tryGetPtr)(HashMap_##T* ptr, str key){\ u32 hash = __HashMap_HASH_FUNC(key);\ HashMapBucket_##T* bu = &ptr->table[hash % ptr->height];\ for(u32 i = 0; i < bu->kvs.len; i++){\ @@ -98,7 +99,7 @@ bool HashMap_##T##_tryPush(HashMap_##T* ptr, str key, T value){\ \ if(bu->kvs.len >= __HashMapBucket_MAX_LEN){\ u32 height_expanded_n = ptr->height_n + 1;\ - if(height_expanded_n >= __HashMap_u32_heights.len){\ + if(height_expanded_n >= __HashMap_##T##_heights.len){\ printf("ERROR: HashMap_" #T " IS FULL\n");\ return false;\ }\ diff --git a/src/compiler/AST.c b/src/compiler/AST.c index b17371c..ba6ab4d 100644 --- a/src/compiler/AST.c +++ b/src/compiler/AST.c @@ -9,17 +9,29 @@ static str _ArgumentType_str[] = { STR("Unset"), STR("Register"), STR("ConstValue"), - STR("DataName"), - STR("NamedDataPointer"), - STR("NamedDataSize"), + STR("VarDataName"), + STR("ConstDataPointer"), + STR("ConstDataSize"), }; str ArgumentType_toString(ArgumentType t){ if(t >= ARRAY_SIZE(_ArgumentType_str)) - return STR("!!INDEX_ERROR!!"); + return STR("!!ArgumentType INDEX_ERROR!!"); return _ArgumentType_str[t]; } +RegisterCode RegisterCode_parse(str r){ + if(str_equals(r, STR("ax"))) + return RegisterCode_ax; + if(str_equals(r, STR("bx"))) + return RegisterCode_bx; + if(str_equals(r, STR("cx"))) + return RegisterCode_cx; + if(str_equals(r, STR("dx"))) + return RegisterCode_dx; + return RegisterCode_Unset; +} + void Section_init(Section* sec, str name){ sec->name = name; @@ -28,7 +40,6 @@ void Section_init(Section* sec, str name){ } void Section_free(Section* sec){ - free(sec->name.data); free(sec->data.data); free(sec->code.data); } diff --git a/src/compiler/AST.h b/src/compiler/AST.h index 651d961..769c4d7 100644 --- a/src/compiler/AST.h +++ b/src/compiler/AST.h @@ -8,17 +8,31 @@ typedef enum ArgumentType { ArgumentType_Unset, ArgumentType_Register, ArgumentType_ConstValue, - ArgumentType_DataName, - ArgumentType_NamedDataPointer, - ArgumentType_NamedDataSize, + ArgumentType_VarDataName, + ArgumentType_ConstDataPointer, + ArgumentType_ConstDataSize, } ArgumentType; str ArgumentType_toString(ArgumentType t); +typedef enum RegisterCode { + RegisterCode_Unset, + RegisterCode_ax, + RegisterCode_bx, + RegisterCode_cx, + RegisterCode_dx +} RegisterCode; + +RegisterCode RegisterCode_parse(str register_name); typedef struct Argument { ArgumentType type; - u32 value; + union { + i64 i; + f64 f; + str data_name; + RegisterCode register_code; + } value; } Argument; List_declare(Argument); @@ -26,7 +40,7 @@ List_declare(Argument); typedef struct Operation { List_Argument args; - Opcode op; + Opcode opcode; } Operation; List_declare(Operation); diff --git a/src/compiler/Compiler.c b/src/compiler/Compiler.c index f2e4f0d..69c4179 100644 --- a/src/compiler/Compiler.c +++ b/src/compiler/Compiler.c @@ -116,9 +116,13 @@ bool Compiler_compile(Compiler* cmp, cstr source_file_name, cstr out_file_name, char* tokstr = malloc(4096); strncpy(tokstr, cmp->code.data + t.begin, t.length); tokstr[t.length] = 0; + char* tokstr_stripped = tokstr; + while(*tokstr_stripped == '\r' || *tokstr_stripped == '\n'){ + tokstr_stripped++; + } printf("[l:%3u, c:%3u] %s '%s'\n", pos.line, pos.column, - TokenType_toString(t.type).data, tokstr); + TokenType_toString(t.type).data, tokstr_stripped); free(tokstr); } } diff --git a/src/compiler/Parser.c b/src/compiler/Parser.c index f479579..7a74bfd 100644 --- a/src/compiler/Parser.c +++ b/src/compiler/Parser.c @@ -17,6 +17,12 @@ Compiler_setError(cmp, "unexpected token '%c'", cmp->code.data[cmp->pos]);\ } +#define setError_unexpectedInstruction(T) {\ + str tok_str = str_copy(Compiler_constructTokenStr(cmp, T));\ + cmp->pos = T.begin;\ + Compiler_setError(cmp, "unexpected instruction '%s'", tok_str.data);\ + free(tok_str.data);\ +} #define Error_TokenUnset "token of undefined type" #define Error_BitSize "invalid size in bits" @@ -28,7 +34,7 @@ static void List_u8_pushBytes(List_u8* l, void* value, u32 startIndex, u32 count } } -static inline bool isVarSizeBits(u32 B) { return (B == 8 && B == 16 && B == 32 && B == 64); } +static inline bool isVarSizeBits(u32 B) { return (B == 8 || B == 16 || B == 32 || B == 64); } static NULLABLE(str) resolveEscapeSequences(Compiler* cmp, str src){ StringBuilder sb = StringBuilder_alloc(src.len); @@ -85,28 +91,41 @@ static void parseDataDefinition(Compiler* cmp, str instr_name, DataDefinition* d ddf->element_size = _element_size_bits / 8; Token tok = cmp->tokens.data[++cmp->tok_i]; + if(tok.type != TokenType_Name){ + setError_unexpectedToken(tok); + return; + } + str tok_str = Compiler_constructTokenStr(cmp, tok); str processed_str = str_null; ddf->name = tok_str; while(++cmp->tok_i < cmp->tokens.len){ + tok = cmp->tokens.data[cmp->tok_i]; switch(tok.type){ - case TokenType_Unset: - setError(Error_TokenUnset); - return; case TokenType_SingleLineComment: case TokenType_MultiLineComment: // skip comments break; + + case TokenType_OperationEnd: + return; + case TokenType_Unset: + setError(Error_TokenUnset); + return; + default: + setError_unexpectedToken(tok); + return; + case TokenType_Number: tok_str = Compiler_constructTokenStr(cmp, tok); processed_str = str_copy(tok_str); if(str_seekChar(tok_str, '.', 0) != -1){ - f64 f = atof(tok_str.data); + f64 f = atof(processed_str.data); List_u8_pushBytes(&ddf->data, &f, 8 - ddf->element_size, ddf->element_size); } else { - i64 i = atoll(tok_str.data); + i64 i = atoll(processed_str.data); List_u8_pushBytes(&ddf->data, &i, 8 - ddf->element_size, ddf->element_size); } free(processed_str.data); @@ -132,18 +151,83 @@ static void parseDataDefinition(Compiler* cmp, str instr_name, DataDefinition* d List_u8_pushBytes(&ddf->data, processed_str.data, 0, processed_str.len); free(processed_str.data); break; - case TokenType_OperationEnd: - return; - default: - setError_unexpectedToken(tok); - return; } } } static void parseOperation(Compiler* cmp, str instr_name, Operation* operPtr){ + Token tok = cmp->tokens.data[cmp->tok_i]; + const Instruction* instr = Instruction_getByName(instr_name); + if(instr == NULL){ + setError_unexpectedInstruction(tok); + return; + } + operPtr->opcode = instr->opcode; + Argument arg = (Argument){ .type = ArgumentType_Unset, .value.i = 0 }; + str tok_str = str_null; + str processed_str = str_null; + while(++cmp->tok_i < cmp->tokens.len){ + tok = cmp->tokens.data[cmp->tok_i]; + switch(tok.type){ + case TokenType_SingleLineComment: + case TokenType_MultiLineComment: + // skip comments + break; + + case TokenType_OperationEnd: + return; + case TokenType_Unset: + setError(Error_TokenUnset); + return; + default: + setError_unexpectedToken(tok); + return; + + case TokenType_Number: + arg.type = ArgumentType_ConstValue; + tok_str = Compiler_constructTokenStr(cmp, tok); + processed_str = str_copy(tok_str); + if(str_seekChar(tok_str, '.', 0) != -1){ + arg.value.f = atof(processed_str.data); + } + else { + arg.value.i = atoll(processed_str.data); + } + free(processed_str.data); + List_Argument_push(&operPtr->args, arg); + break; + case TokenType_Name: + tok_str = Compiler_constructTokenStr(cmp, tok); + arg.value.register_code = RegisterCode_parse(tok_str); + if(arg.value.register_code != RegisterCode_Unset){ + arg.type = ArgumentType_Register; + } + else { + arg.type = ArgumentType_VarDataName; + arg.value.data_name = tok_str; + } + List_Argument_push(&operPtr->args, arg); + break; + case TokenType_NamedDataPointer: + tok_str = Compiler_constructTokenStr(cmp, tok); + tok_str.data++; + tok_str.len--; + arg.type = ArgumentType_ConstDataPointer; + arg.value.data_name = tok_str; + List_Argument_push(&operPtr->args, arg); + break; + case TokenType_NamedDataSize: + tok_str = Compiler_constructTokenStr(cmp, tok); + tok_str.data++; + tok_str.len--; + arg.type = ArgumentType_ConstDataSize; + arg.value.data_name = tok_str; + List_Argument_push(&operPtr->args, arg); + break; + } + } } bool Compiler_parse(Compiler* cmp){ diff --git a/src/compiler/Token.c b/src/compiler/Token.c index fbf9b20..502262d 100644 --- a/src/compiler/Token.c +++ b/src/compiler/Token.c @@ -13,11 +13,12 @@ static str _TokenType_str[] = { STR("String"), STR("Name"), STR("NamedDataPointer"), - STR("NamedDataSize") + STR("NamedDataSize"), + STR("OperationEnd"), }; str TokenType_toString(TokenType t){ if(t >= ARRAY_SIZE(_TokenType_str)) - return STR("!!INDEX_ERROR!!"); + return STR("!!TokenType INDEX_ERROR!!"); return _TokenType_str[t]; } diff --git a/src/instructions/instructions.c b/src/instructions/instructions.c index f6ccedc..aeb790c 100644 --- a/src/instructions/instructions.c +++ b/src/instructions/instructions.c @@ -1,4 +1,5 @@ #include "instructions.h" +#include "../collections/HashMap.h" i32 NOP_impl(VM* vm); i32 PUSH_impl(VM* vm); @@ -13,8 +14,8 @@ i32 EXIT_impl(VM* vm); i32 JMP_impl(VM* vm); i32 CALL_impl(VM* vm); - -const Instruction instructions[] = { +Array_declare(Instruction); +static const Array_Instruction instructions_array = ARRAY(Instruction, { Instruction_construct(NOP), Instruction_construct(PUSH), Instruction_construct(MOV), @@ -27,11 +28,28 @@ const Instruction instructions[] = { Instruction_construct(EXIT), // Instruction_construct(JMP), // Instruction_construct(CALL), -}; +}); const Instruction* Instruction_getByOpcode(Opcode opcode){ - if(opcode >= ARRAY_SIZE(instructions)) + if(opcode >= instructions_array.len) return NULL; - return instructions + opcode; + return instructions_array.data + opcode; +} + +HashMap_declare(Instruction); +HashMap_define(Instruction, HashMap_DESTROY_VALUE_FUNC_NULL); + +static HashMap_Instruction* instructions_map = NULL; + +const Instruction* Instruction_getByName(str name){ + if(instructions_map == NULL){ + instructions_map = malloc(sizeof(HashMap_Instruction)); + HashMap_Instruction_alloc(instructions_map); + for(u32 i = 0; i < instructions_array.len; i++){ + HashMap_Instruction_tryPush(instructions_map, instructions_array.data[i].name, instructions_array.data[i]); + } + } + + return HashMap_Instruction_tryGetPtr(instructions_map, name); } diff --git a/src/instructions/instructions.h b/src/instructions/instructions.h index 08c5000..abcf744 100644 --- a/src/instructions/instructions.h +++ b/src/instructions/instructions.h @@ -34,3 +34,4 @@ typedef struct Instruction { /// @param opcode any byte /// @return ptr to struct or NULL const Instruction* NULLABLE(Instruction_getByOpcode)(Opcode opcode); +const Instruction* NULLABLE(Instruction_getByName)(str name);