From 1cae800a669d186475e65eb9a1826ecf9a1b31c1 Mon Sep 17 00:00:00 2001 From: Timerix Date: Wed, 20 Nov 2024 07:41:26 +0500 Subject: [PATCH] List and Compiler --- src/collections/Array.h | 20 +++++ src/collections/List.h | 41 ++++++++++ src/compiler/compiler.c | 169 ++++++++++++++++++++++++++++++++++++++++ src/compiler/compiler.h | 41 ++++++++++ src/compiler/token.h | 21 +++++ src/cstr.c | 52 +++++++++++++ src/main.c | 121 ++++++++++++++-------------- src/std.h | 6 ++ 8 files changed, 409 insertions(+), 62 deletions(-) create mode 100644 src/collections/Array.h create mode 100644 src/collections/List.h create mode 100644 src/compiler/compiler.c create mode 100644 src/compiler/compiler.h create mode 100644 src/compiler/token.h create mode 100644 src/cstr.c diff --git a/src/collections/Array.h b/src/collections/Array.h new file mode 100644 index 0000000..24a4255 --- /dev/null +++ b/src/collections/Array.h @@ -0,0 +1,20 @@ +#pragma once +#include "../std.h" + +#define Array_declare(T)\ + typedef struct Array_##T {\ + T* data;\ + u32 len;\ + } Array_##T;\ + \ + static inline Array_##T Array_##T##_construct(T* data_ptr, u32 len) {\ + return (Array_##T){ .data = data_ptr, .len = len };\ + }\ + \ + static inline Array_##T Array_##T##_alloc(u32 len){\ + return Array_##T##_construct(malloc(len * sizeof(T)), len);\ + }\ + static inline void Array_##T##_realloc(Array_##T* ptr, u32 new_len){\ + ptr->data = realloc(ptr->data, new_len * sizeof(T));\ + ptr->len = new_len;\ + } diff --git a/src/collections/List.h b/src/collections/List.h new file mode 100644 index 0000000..24342ca --- /dev/null +++ b/src/collections/List.h @@ -0,0 +1,41 @@ +#pragma once +#include "../std.h" + +#define List_declare(T)\ + typedef struct List_##T {\ + T* data;\ + u32 len;\ + u32 max_len;\ + } List_##T;\ + \ + static inline List_##T List_##T##_construct(T* data_ptr, u32 len, u32 max_len) {\ + return (List_##T){ .data = data_ptr, .len = len, .max_len = max_len };\ + }\ + \ + static inline List_##T List_##T##_alloc(u32 len){\ + return List_##T##_construct(len > 0 ? malloc(len * sizeof(T)) : NULL, 0, 0);\ + }\ + \ + void List_##T##_push(List_##T* ptr, T value); + + +#define List_define(T)\ + void List_##T##_push(List_##T* ptr, T value){\ + u32 max_len = ptr->max_len;\ + if(ptr->len == max_len){\ + max_len = max_len * 1.5;\ + max_len += __List_padding_in_sizeof_T(T);\ + /* branchless version of max(max_len, __List_min_size) */\ + max_len += (max_len < __List_min_size) * (__List_min_size - max_len);\ + ptr->data = realloc(ptr->data, max_len * sizeof(T));\ + ptr->max_len = max_len;\ + }\ + ptr->data[ptr->len++] = value;\ + } + +#define __List_min_size 16 + +// sizeof(T) == 1 - padding is 7 of sizeof(T) +// sizeof(T) == 2 - padding is 3 of sizeof(T) +// sizeof(T) == 4 - padding is 1 of sizeof(T) +#define __List_padding_in_sizeof_T(T) ((8 - sizeof(T) % 8) / sizeof(T) ) diff --git a/src/compiler/compiler.c b/src/compiler/compiler.c new file mode 100644 index 0000000..920ed62 --- /dev/null +++ b/src/compiler/compiler.c @@ -0,0 +1,169 @@ +#include "compiler.h" + +List_define(Token); + +void _Compiler_setError(Compiler* cmp, const char* context, const char* format, ...){ + va_list argv; + char position_str[32]; + sprintf(position_str, "[at %u:%u][", cmp->line, cmp->column); + char* real_format = strcat_malloc(position_str, context, "] ", format); + va_start(argv, format); + char* NULLABLE(buf) = vsprintf_malloc(512, real_format, argv); + va_end(argv); + free(real_format); + if(cmp->error_message == NULL){ + cmp->error_message = malloc(16); + strcpy(cmp->error_message, "SPRINTF FAILED"); + } + cmp->state = CompilerState_Error; + cmp->error_message = buf; +} + + +#define setError(FORMAT, ...) Compiler_setError(cmp, FORMAT, ##__VA_ARGS__); + +#define returnError(FORMAT, ...) {\ + setError(FORMAT, ##__VA_ARGS__);\ + return false;\ +} + +#define returnErrorIf(STATEMENT, FORMAT, ...) if(STATEMENT) returnError(FORMAT, ##__VA_ARGS__) + +#define returnErrorIf_auto(STATEMENT) returnErrorIf(STATEMENT, #STATEMENT) + +#define Error_UnexpectedCharacter(C) "unexpected character '%c'", C + +#define Error_endOfFile() "unexpected end of file" + +void Compiler_init(Compiler* cmp){ + memset(cmp, 0, sizeof(Compiler)); + cmp->state = CompilerState_Initial; + cmp->tokens = List_Token_alloc(1024 * 8); +} + +void Compiler_free(Compiler* cmp){ + if(cmp->tokens.data) + free(cmp->tokens.data); +} + +static void _recognizeLexema(Compiler* cmp, size_t pos, char c){ + switch(c){ + // skip blank characters + case ' ': case '\t': case '\r': case '\n': + break; + // try read comment + case '/': + cmp->state = CompilerState_ReadingComment; + break; + // try read label + case '.': + cmp->state = CompilerState_ReadingLabel; + break; + default: + // try read instruction + if(isAlphabeticalL(c) || isAlphabeticalR(c)){ + cmp->state = CompilerState_ReadingInstruction; + break; + } + else setError(Error_UnexpectedCharacter(c)); + } +} + +static void _readCommentChar(Compiler* cmp, size_t pos, char c, Token* tok){ + // begin comment + if(tok->type == TokenType_Unset){ + if(c == '*') + tok->type = TokenType_MultiLineComment; + else if(c == '/') + tok->type = TokenType_SingleLineComment; + else { + setError(Error_UnexpectedCharacter(c)); + return; + } + tok->begin = pos; + tok->length = 0; + } + // end multi-line comment + else if(c == '/' && tok->type == TokenType_SingleLineComment) { + if(cmp->code[pos - 1] == '*' && pos - 1 > tok->begin){ + tok->length = pos - tok->begin - 2; + } + } + // end single-line comment + else if(c == '\n' && tok->type == TokenType_SingleLineComment) { + tok->length = pos - tok->begin - 1; + // remove trailing '\r' + if(cmp->code[pos - 1] == '\r') + tok->length--; + List_Token_push(&cmp->tokens, *tok); + } + // at the end of file + else if(pos == cmp->code_len - 1){ + // end single-line comment + if(tok->type == TokenType_SingleLineComment){ + tok->length = pos - tok->begin; + } + // error on unclosed multiline comment + else setError(Error_endOfFile()); + } + // do nothing on comment content +} + + + +bool _Compiler_lex(Compiler* cmp){ + cmp->line = 1; + cmp->column = 1; + Token tok = Token_construct(TokenType_Unset, 0, 0); + for(size_t pos = 0; pos < cmp->code_len; pos++){ + char c = cmp->code[pos]; + + switch(cmp->state){ + case CompilerState_Error: + return false; + case CompilerState_LexemaRecognition: + _recognizeLexema(cmp, pos, c); + break; + case CompilerState_ReadingComment: + _readCommentChar(cmp, pos, c, &tok); + break; + default: + returnError("Unexpected compiler state %i", cmp->state); + } + + if(c == '\n'){ + cmp->column = 0; + cmp->line++; + } + cmp->column++; + } + + return cmp->state != CompilerState_Error; +} + +bool _Compiler_parse(Compiler* cmp){ + return true; +} +bool _Compiler_compile(Compiler* cmp){ + return true; +} + +bool Compiler_compileTasm(Compiler* cmp, const char* restrict code, size_t code_len, char* restrict out_buffer, size_t out_len){ + returnErrorIf_auto(code == NULL); + returnErrorIf_auto(code_len == 0); + returnErrorIf_auto(out_buffer == NULL); + returnErrorIf_auto(out_len == 0); + cmp->code = code; + cmp->code_len = code_len; + cmp->out_buffer = out_buffer; + cmp->out_len = out_len; + + if(!_Compiler_lex(cmp)) + return false; + if(!_Compiler_parse(cmp)) + return false; + if(!_Compiler_compile(cmp)) + return false; + + return true; +} diff --git a/src/compiler/compiler.h b/src/compiler/compiler.h new file mode 100644 index 0000000..f83b689 --- /dev/null +++ b/src/compiler/compiler.h @@ -0,0 +1,41 @@ +#pragma once +#include "../std.h" +#include "../collections/List.h" +#include "token.h" + +List_declare(Token); + +typedef enum CompilerState { + CompilerState_Initial, + CompilerState_LexemaRecognition, + CompilerState_ReadingComment, + CompilerState_ReadingLabel, + CompilerState_ReadingInstruction, + CompilerState_ReadingArguments, + CompilerState_ReadingData, + CompilerState_Error, + CompilerState_Success +} CompilerState; + +typedef struct Compiler { + const char* code; + size_t code_len; + char* out_buffer; + size_t out_len; + + u32 line; // > 0 if code parsing started + u32 column; // > 0 if code parsing started + NULLABLE(char* error_message); + CompilerState state; + List_Token tokens; +} Compiler; + +void Compiler_init(Compiler* cmp); +void Compiler_free(Compiler* cmp); + +/// @brief compile assembly language code to machine code +/// @return true if no errors, false if any error occured (check cmp->error_message) +bool Compiler_compileTasm(Compiler* cmp, const char* restrict code, size_t code_len, char* restrict out_buffer, size_t out_len); + +#define Compiler_setError(cmp, format, ...) _Compiler_setError(cmp, __func__, format ,##__VA_ARGS__) +void _Compiler_setError(Compiler* cmp, const char* context, const char* format, ...) __attribute__((__format__(__printf__, 3, 4))); diff --git a/src/compiler/token.h b/src/compiler/token.h new file mode 100644 index 0000000..36ae24b --- /dev/null +++ b/src/compiler/token.h @@ -0,0 +1,21 @@ +#pragma once +#include "../std.h" + +typedef enum TokenType { + TokenType_Unset, + TokenType_SingleLineComment, + TokenType_MultiLineComment, + TokenType_Label, + TokenType_Instruction, + TokenType_Argument, + TokenType_Data, + /* there is a place for 2 values left (TokenType must occupy 4 bits) */ +} TokenType; + +typedef struct Token { + u32 begin; // some index in Compiler->code + u32 length : 28; // length in characters (28 bits) + TokenType type : 4; // type of token (4 bits) +} Token; + +#define Token_construct(TYPE, BEGIN, END) ((Token){ .type = TYPE, .begin = BEGIN, .length = END }) diff --git a/src/cstr.c b/src/cstr.c new file mode 100644 index 0000000..af6aac1 --- /dev/null +++ b/src/cstr.c @@ -0,0 +1,52 @@ +#include "std.h" + +char* _strcat_malloc(size_t n, cstr str0, ...){ + va_list argv; + va_start(argv, str0); + char* heap_ptr = _vstrcat_malloc(n, str0, argv); + va_end(argv); + return heap_ptr; +} + +char* _vstrcat_malloc(size_t n, cstr str0, va_list argv){ + size_t str0_len = strlen(str0); + size_t total_len = str0_len; + cstr* const parts = malloc(sizeof(cstr) * n); + size_t* const part_lengths = malloc(sizeof(size_t) * n); + for(size_t i = 0; i < n; i++){ + cstr part = va_arg(argv, cstr); + size_t length = strlen(part); + parts[i] = part; + part_lengths[i] = length; + total_len += length; + } + char* const buf = malloc(total_len + 1); + memcpy(buf, str0, str0_len); + char* walking_ptr = buf + str0_len; + for(size_t i = 0; i < n; i++){ + memcpy(walking_ptr, parts[i], part_lengths[i]); + walking_ptr += part_lengths[i]; + } + buf[total_len] = '\0'; + free(parts); + free(part_lengths); + return buf; +} + +char* NULLABLE(sprintf_malloc)(size_t buffer_size, cstr format, ...){ + va_list argv; + va_start(argv, format); + char* NULLABLE(heap_ptr) = vsprintf_malloc(buffer_size, format, argv); + va_end(argv); + return heap_ptr; +} + +char* NULLABLE(vsprintf_malloc)(size_t buffer_size, cstr format, va_list argv){ + char* buf = malloc(buffer_size); + int r = vsprintf_s(buf, buffer_size, format, argv); + if(r < 0){ + free(buf); + return NULL; + } + return buf; +} diff --git a/src/main.c b/src/main.c index 47adf7d..741f790 100644 --- a/src/main.c +++ b/src/main.c @@ -1,17 +1,34 @@ #include "VM/VM.h" #include "instructions/instructions.h" +#include "collections/List.h" + +List_declare(cstr); +List_define(cstr); #define arg_is(STR) (strcmp(argv[argi], STR) == 0) -i32 main(const i32 argc, const char** argv){ - const char* NULLABLE(filename) = NULL; +i32 compileSources(cstr out_file, List_cstr* sources); +i32 bootFromImage(cstr image_file); + +i32 main(const i32 argc, cstr* argv){ + if(argc < 2){ + printfe("ERROR: no arguments provided. Use --help to know more.\n"); + return 1; + } + + bool boot = false; + cstr NULLABLE(image_file) = NULL; + bool compile = false; + cstr NULLABLE(out_file) = NULL; + List_cstr NULLABLE(sources) = List_cstr_construct(NULL, 0, 0); for(i32 argi = 1; argi < argc; argi++){ if(arg_is("-h") || arg_is("--help")){ printf( - "-h, --help Show this message\n" - "-op, --opcodes Shows list of all instructions.\n" - "-i, --image [FILE] Boot VM using image file\n" + "-h, --help Show this message.\n" + "-op, --opcodes Show list of all instructions.\n" + "-i, --image [FILE] Boot VM using image file.\n" + "-c, --compile [OUT_FILE] [SOURCES...] Compile assembly source files to machine code.\n" ); return 0; } @@ -25,11 +42,32 @@ i32 main(const i32 argc, const char** argv){ return 0; } else if(arg_is("-i") || arg_is("--image")){ + if(boot){ + printfe("--image flag is set already\n"); + return 1; + } if(++argi >= argc){ printfe("ERROR: no image file specified\n"); return 1; } - filename = argv[argi]; + image_file = argv[argi]; + boot = true; + } + else if(arg_is("-c") || arg_is("--compile")){ + if(compile){ + printfe("--compile flag is set already\n"); + return 1; + } + if(++argi >= argc){ + printfe("ERROR: no output file file specified\n"); + return 1; + } + out_file = argv[argi]; + sources = List_cstr_alloc(argc); + compile = true; + } + else if(compile){ + List_cstr_push(&sources, argv[argi]); } else { printfe("ERROR: unknown argument '%s'\n", argv[argi]); @@ -37,14 +75,21 @@ i32 main(const i32 argc, const char** argv){ } } - if(filename == NULL){ - printfe("ERROR: no arguments provided. Use --help to know more.\n"); - return 1; + i32 exit_code = 0; + if(compile){ + exit_code = compileSources(out_file, &sources); + } + if(exit_code == 0 && boot){ + exit_code = bootFromImage(image_file); } - FILE* file = fopen(filename, "rb"); + return exit_code; +} + +i32 bootFromImage(cstr image_file){ + FILE* file = fopen(image_file, "rb"); if(file == NULL){ - printfe("ERROR: can't open file '%s'\n", filename); + printfe("ERROR: can't open file '%s'\n", image_file); return 1; } @@ -55,7 +100,7 @@ i32 main(const i32 argc, const char** argv){ size_t bytes_read = fread(vm_memory, 1, buffer_size, file); fclose(file); if(bytes_read == (size_t)EOF){ - printfe("ERROR: can't read file '%s'\n", filename); + printfe("ERROR: can't read file '%s'\n", image_file); free(vm_memory); return 1; } @@ -83,54 +128,6 @@ i32 main(const i32 argc, const char** argv){ return exit_code; } - -char* _strcat_malloc(size_t n, const char* str0, ...){ - va_list argv; - va_start(argv, str0); - char* heap_ptr = _vstrcat_malloc(n, str0, argv); - va_end(argv); - return heap_ptr; -} - -char* _vstrcat_malloc(size_t n, const char* str0, va_list argv){ - size_t str0_len = strlen(str0); - size_t total_len = str0_len; - const char** const parts = malloc(sizeof(const char*) * n); - size_t* const part_lengths = malloc(sizeof(size_t) * n); - for(size_t i = 0; i < n; i++){ - const char* part = va_arg(argv, const char*); - size_t length = strlen(part); - parts[i] = part; - part_lengths[i] = length; - total_len += length; - } - char* const buf = malloc(total_len + 1); - memcpy(buf, str0, str0_len); - char* walking_ptr = buf + str0_len; - for(size_t i = 0; i < n; i++){ - memcpy(walking_ptr, parts[i], part_lengths[i]); - walking_ptr += part_lengths[i]; - } - buf[total_len] = '\0'; - free(parts); - free(part_lengths); - return buf; -} - -char* NULLABLE(sprintf_malloc)(size_t buffer_size, const char* format, ...){ - va_list argv; - va_start(argv, format); - char* NULLABLE(heap_ptr) = vsprintf_malloc(buffer_size, format, argv); - va_end(argv); - return heap_ptr; -} - -char* NULLABLE(vsprintf_malloc)(size_t buffer_size, const char* format, va_list argv){ - char* buf = malloc(buffer_size); - int r = vsprintf_s(buf, buffer_size, format, argv); - if(r < 0){ - free(buf); - return NULL; - } - return buf; +i32 compileSources(cstr out_file, List_cstr* sources){ + return -1; } diff --git a/src/std.h b/src/std.h index 9f30d40..29313a1 100644 --- a/src/std.h +++ b/src/std.h @@ -24,6 +24,8 @@ typedef u8 bool; #define true 1 #define false 0 +typedef const char* cstr; + #define __count_args( \ a0, a1, a2, a3, a4, a5, a6, a7 , a8, a9, a10,a11,a12,a13,a14,a15, \ a16,a17,a18,a19,a20,a21,a22,a23, a24,a25,a26,a27,a28,a29,a30,a31, \ @@ -49,3 +51,7 @@ char* _vstrcat_malloc(size_t n, const char* str0, va_list argv); char* NULLABLE(sprintf_malloc)(size_t buffer_size, const char* format, ...) __attribute__((__format__(__printf__, 2, 3))); char* NULLABLE(vsprintf_malloc)(size_t buffer_size, const char* format, va_list argv); + +static inline bool isAlphabeticalL(char c) { return c - 'a' <= 'z'; } +static inline bool isAlphabeticalR(char c) { return c - 'A' <= 'Z'; } +static inline bool isDigit(char c) { return c - '0' <= '9'; }