List and Compiler

This commit is contained in:
Timerix 2024-11-20 07:41:26 +05:00
parent a073d0ebd9
commit 1cae800a66
8 changed files with 409 additions and 62 deletions

20
src/collections/Array.h Normal file
View File

@ -0,0 +1,20 @@
#pragma once
#include "../std.h"
#define Array_declare(T)\
typedef struct Array_##T {\
T* data;\
u32 len;\
} Array_##T;\
\
static inline Array_##T Array_##T##_construct(T* data_ptr, u32 len) {\
return (Array_##T){ .data = data_ptr, .len = len };\
}\
\
static inline Array_##T Array_##T##_alloc(u32 len){\
return Array_##T##_construct(malloc(len * sizeof(T)), len);\
}\
static inline void Array_##T##_realloc(Array_##T* ptr, u32 new_len){\
ptr->data = realloc(ptr->data, new_len * sizeof(T));\
ptr->len = new_len;\
}

41
src/collections/List.h Normal file
View File

@ -0,0 +1,41 @@
#pragma once
#include "../std.h"
#define List_declare(T)\
typedef struct List_##T {\
T* data;\
u32 len;\
u32 max_len;\
} List_##T;\
\
static inline List_##T List_##T##_construct(T* data_ptr, u32 len, u32 max_len) {\
return (List_##T){ .data = data_ptr, .len = len, .max_len = max_len };\
}\
\
static inline List_##T List_##T##_alloc(u32 len){\
return List_##T##_construct(len > 0 ? malloc(len * sizeof(T)) : NULL, 0, 0);\
}\
\
void List_##T##_push(List_##T* ptr, T value);
#define List_define(T)\
void List_##T##_push(List_##T* ptr, T value){\
u32 max_len = ptr->max_len;\
if(ptr->len == max_len){\
max_len = max_len * 1.5;\
max_len += __List_padding_in_sizeof_T(T);\
/* branchless version of max(max_len, __List_min_size) */\
max_len += (max_len < __List_min_size) * (__List_min_size - max_len);\
ptr->data = realloc(ptr->data, max_len * sizeof(T));\
ptr->max_len = max_len;\
}\
ptr->data[ptr->len++] = value;\
}
#define __List_min_size 16
// sizeof(T) == 1 - padding is 7 of sizeof(T)
// sizeof(T) == 2 - padding is 3 of sizeof(T)
// sizeof(T) == 4 - padding is 1 of sizeof(T)
#define __List_padding_in_sizeof_T(T) ((8 - sizeof(T) % 8) / sizeof(T) )

169
src/compiler/compiler.c Normal file
View File

@ -0,0 +1,169 @@
#include "compiler.h"
List_define(Token);
void _Compiler_setError(Compiler* cmp, const char* context, const char* format, ...){
va_list argv;
char position_str[32];
sprintf(position_str, "[at %u:%u][", cmp->line, cmp->column);
char* real_format = strcat_malloc(position_str, context, "] ", format);
va_start(argv, format);
char* NULLABLE(buf) = vsprintf_malloc(512, real_format, argv);
va_end(argv);
free(real_format);
if(cmp->error_message == NULL){
cmp->error_message = malloc(16);
strcpy(cmp->error_message, "SPRINTF FAILED");
}
cmp->state = CompilerState_Error;
cmp->error_message = buf;
}
#define setError(FORMAT, ...) Compiler_setError(cmp, FORMAT, ##__VA_ARGS__);
#define returnError(FORMAT, ...) {\
setError(FORMAT, ##__VA_ARGS__);\
return false;\
}
#define returnErrorIf(STATEMENT, FORMAT, ...) if(STATEMENT) returnError(FORMAT, ##__VA_ARGS__)
#define returnErrorIf_auto(STATEMENT) returnErrorIf(STATEMENT, #STATEMENT)
#define Error_UnexpectedCharacter(C) "unexpected character '%c'", C
#define Error_endOfFile() "unexpected end of file"
void Compiler_init(Compiler* cmp){
memset(cmp, 0, sizeof(Compiler));
cmp->state = CompilerState_Initial;
cmp->tokens = List_Token_alloc(1024 * 8);
}
void Compiler_free(Compiler* cmp){
if(cmp->tokens.data)
free(cmp->tokens.data);
}
static void _recognizeLexema(Compiler* cmp, size_t pos, char c){
switch(c){
// skip blank characters
case ' ': case '\t': case '\r': case '\n':
break;
// try read comment
case '/':
cmp->state = CompilerState_ReadingComment;
break;
// try read label
case '.':
cmp->state = CompilerState_ReadingLabel;
break;
default:
// try read instruction
if(isAlphabeticalL(c) || isAlphabeticalR(c)){
cmp->state = CompilerState_ReadingInstruction;
break;
}
else setError(Error_UnexpectedCharacter(c));
}
}
static void _readCommentChar(Compiler* cmp, size_t pos, char c, Token* tok){
// begin comment
if(tok->type == TokenType_Unset){
if(c == '*')
tok->type = TokenType_MultiLineComment;
else if(c == '/')
tok->type = TokenType_SingleLineComment;
else {
setError(Error_UnexpectedCharacter(c));
return;
}
tok->begin = pos;
tok->length = 0;
}
// end multi-line comment
else if(c == '/' && tok->type == TokenType_SingleLineComment) {
if(cmp->code[pos - 1] == '*' && pos - 1 > tok->begin){
tok->length = pos - tok->begin - 2;
}
}
// end single-line comment
else if(c == '\n' && tok->type == TokenType_SingleLineComment) {
tok->length = pos - tok->begin - 1;
// remove trailing '\r'
if(cmp->code[pos - 1] == '\r')
tok->length--;
List_Token_push(&cmp->tokens, *tok);
}
// at the end of file
else if(pos == cmp->code_len - 1){
// end single-line comment
if(tok->type == TokenType_SingleLineComment){
tok->length = pos - tok->begin;
}
// error on unclosed multiline comment
else setError(Error_endOfFile());
}
// do nothing on comment content
}
bool _Compiler_lex(Compiler* cmp){
cmp->line = 1;
cmp->column = 1;
Token tok = Token_construct(TokenType_Unset, 0, 0);
for(size_t pos = 0; pos < cmp->code_len; pos++){
char c = cmp->code[pos];
switch(cmp->state){
case CompilerState_Error:
return false;
case CompilerState_LexemaRecognition:
_recognizeLexema(cmp, pos, c);
break;
case CompilerState_ReadingComment:
_readCommentChar(cmp, pos, c, &tok);
break;
default:
returnError("Unexpected compiler state %i", cmp->state);
}
if(c == '\n'){
cmp->column = 0;
cmp->line++;
}
cmp->column++;
}
return cmp->state != CompilerState_Error;
}
bool _Compiler_parse(Compiler* cmp){
return true;
}
bool _Compiler_compile(Compiler* cmp){
return true;
}
bool Compiler_compileTasm(Compiler* cmp, const char* restrict code, size_t code_len, char* restrict out_buffer, size_t out_len){
returnErrorIf_auto(code == NULL);
returnErrorIf_auto(code_len == 0);
returnErrorIf_auto(out_buffer == NULL);
returnErrorIf_auto(out_len == 0);
cmp->code = code;
cmp->code_len = code_len;
cmp->out_buffer = out_buffer;
cmp->out_len = out_len;
if(!_Compiler_lex(cmp))
return false;
if(!_Compiler_parse(cmp))
return false;
if(!_Compiler_compile(cmp))
return false;
return true;
}

41
src/compiler/compiler.h Normal file
View File

@ -0,0 +1,41 @@
#pragma once
#include "../std.h"
#include "../collections/List.h"
#include "token.h"
List_declare(Token);
typedef enum CompilerState {
CompilerState_Initial,
CompilerState_LexemaRecognition,
CompilerState_ReadingComment,
CompilerState_ReadingLabel,
CompilerState_ReadingInstruction,
CompilerState_ReadingArguments,
CompilerState_ReadingData,
CompilerState_Error,
CompilerState_Success
} CompilerState;
typedef struct Compiler {
const char* code;
size_t code_len;
char* out_buffer;
size_t out_len;
u32 line; // > 0 if code parsing started
u32 column; // > 0 if code parsing started
NULLABLE(char* error_message);
CompilerState state;
List_Token tokens;
} Compiler;
void Compiler_init(Compiler* cmp);
void Compiler_free(Compiler* cmp);
/// @brief compile assembly language code to machine code
/// @return true if no errors, false if any error occured (check cmp->error_message)
bool Compiler_compileTasm(Compiler* cmp, const char* restrict code, size_t code_len, char* restrict out_buffer, size_t out_len);
#define Compiler_setError(cmp, format, ...) _Compiler_setError(cmp, __func__, format ,##__VA_ARGS__)
void _Compiler_setError(Compiler* cmp, const char* context, const char* format, ...) __attribute__((__format__(__printf__, 3, 4)));

21
src/compiler/token.h Normal file
View File

@ -0,0 +1,21 @@
#pragma once
#include "../std.h"
typedef enum TokenType {
TokenType_Unset,
TokenType_SingleLineComment,
TokenType_MultiLineComment,
TokenType_Label,
TokenType_Instruction,
TokenType_Argument,
TokenType_Data,
/* there is a place for 2 values left (TokenType must occupy 4 bits) */
} TokenType;
typedef struct Token {
u32 begin; // some index in Compiler->code
u32 length : 28; // length in characters (28 bits)
TokenType type : 4; // type of token (4 bits)
} Token;
#define Token_construct(TYPE, BEGIN, END) ((Token){ .type = TYPE, .begin = BEGIN, .length = END })

52
src/cstr.c Normal file
View File

@ -0,0 +1,52 @@
#include "std.h"
char* _strcat_malloc(size_t n, cstr str0, ...){
va_list argv;
va_start(argv, str0);
char* heap_ptr = _vstrcat_malloc(n, str0, argv);
va_end(argv);
return heap_ptr;
}
char* _vstrcat_malloc(size_t n, cstr str0, va_list argv){
size_t str0_len = strlen(str0);
size_t total_len = str0_len;
cstr* const parts = malloc(sizeof(cstr) * n);
size_t* const part_lengths = malloc(sizeof(size_t) * n);
for(size_t i = 0; i < n; i++){
cstr part = va_arg(argv, cstr);
size_t length = strlen(part);
parts[i] = part;
part_lengths[i] = length;
total_len += length;
}
char* const buf = malloc(total_len + 1);
memcpy(buf, str0, str0_len);
char* walking_ptr = buf + str0_len;
for(size_t i = 0; i < n; i++){
memcpy(walking_ptr, parts[i], part_lengths[i]);
walking_ptr += part_lengths[i];
}
buf[total_len] = '\0';
free(parts);
free(part_lengths);
return buf;
}
char* NULLABLE(sprintf_malloc)(size_t buffer_size, cstr format, ...){
va_list argv;
va_start(argv, format);
char* NULLABLE(heap_ptr) = vsprintf_malloc(buffer_size, format, argv);
va_end(argv);
return heap_ptr;
}
char* NULLABLE(vsprintf_malloc)(size_t buffer_size, cstr format, va_list argv){
char* buf = malloc(buffer_size);
int r = vsprintf_s(buf, buffer_size, format, argv);
if(r < 0){
free(buf);
return NULL;
}
return buf;
}

View File

@ -1,17 +1,34 @@
#include "VM/VM.h" #include "VM/VM.h"
#include "instructions/instructions.h" #include "instructions/instructions.h"
#include "collections/List.h"
List_declare(cstr);
List_define(cstr);
#define arg_is(STR) (strcmp(argv[argi], STR) == 0) #define arg_is(STR) (strcmp(argv[argi], STR) == 0)
i32 main(const i32 argc, const char** argv){ i32 compileSources(cstr out_file, List_cstr* sources);
const char* NULLABLE(filename) = NULL; i32 bootFromImage(cstr image_file);
i32 main(const i32 argc, cstr* argv){
if(argc < 2){
printfe("ERROR: no arguments provided. Use --help to know more.\n");
return 1;
}
bool boot = false;
cstr NULLABLE(image_file) = NULL;
bool compile = false;
cstr NULLABLE(out_file) = NULL;
List_cstr NULLABLE(sources) = List_cstr_construct(NULL, 0, 0);
for(i32 argi = 1; argi < argc; argi++){ for(i32 argi = 1; argi < argc; argi++){
if(arg_is("-h") || arg_is("--help")){ if(arg_is("-h") || arg_is("--help")){
printf( printf(
"-h, --help Show this message\n" "-h, --help Show this message.\n"
"-op, --opcodes Shows list of all instructions.\n" "-op, --opcodes Show list of all instructions.\n"
"-i, --image [FILE] Boot VM using image file\n" "-i, --image [FILE] Boot VM using image file.\n"
"-c, --compile [OUT_FILE] [SOURCES...] Compile assembly source files to machine code.\n"
); );
return 0; return 0;
} }
@ -25,11 +42,32 @@ i32 main(const i32 argc, const char** argv){
return 0; return 0;
} }
else if(arg_is("-i") || arg_is("--image")){ else if(arg_is("-i") || arg_is("--image")){
if(boot){
printfe("--image flag is set already\n");
return 1;
}
if(++argi >= argc){ if(++argi >= argc){
printfe("ERROR: no image file specified\n"); printfe("ERROR: no image file specified\n");
return 1; return 1;
} }
filename = argv[argi]; image_file = argv[argi];
boot = true;
}
else if(arg_is("-c") || arg_is("--compile")){
if(compile){
printfe("--compile flag is set already\n");
return 1;
}
if(++argi >= argc){
printfe("ERROR: no output file file specified\n");
return 1;
}
out_file = argv[argi];
sources = List_cstr_alloc(argc);
compile = true;
}
else if(compile){
List_cstr_push(&sources, argv[argi]);
} }
else { else {
printfe("ERROR: unknown argument '%s'\n", argv[argi]); printfe("ERROR: unknown argument '%s'\n", argv[argi]);
@ -37,14 +75,21 @@ i32 main(const i32 argc, const char** argv){
} }
} }
if(filename == NULL){ i32 exit_code = 0;
printfe("ERROR: no arguments provided. Use --help to know more.\n"); if(compile){
return 1; exit_code = compileSources(out_file, &sources);
}
if(exit_code == 0 && boot){
exit_code = bootFromImage(image_file);
} }
FILE* file = fopen(filename, "rb"); return exit_code;
}
i32 bootFromImage(cstr image_file){
FILE* file = fopen(image_file, "rb");
if(file == NULL){ if(file == NULL){
printfe("ERROR: can't open file '%s'\n", filename); printfe("ERROR: can't open file '%s'\n", image_file);
return 1; return 1;
} }
@ -55,7 +100,7 @@ i32 main(const i32 argc, const char** argv){
size_t bytes_read = fread(vm_memory, 1, buffer_size, file); size_t bytes_read = fread(vm_memory, 1, buffer_size, file);
fclose(file); fclose(file);
if(bytes_read == (size_t)EOF){ if(bytes_read == (size_t)EOF){
printfe("ERROR: can't read file '%s'\n", filename); printfe("ERROR: can't read file '%s'\n", image_file);
free(vm_memory); free(vm_memory);
return 1; return 1;
} }
@ -83,54 +128,6 @@ i32 main(const i32 argc, const char** argv){
return exit_code; return exit_code;
} }
i32 compileSources(cstr out_file, List_cstr* sources){
char* _strcat_malloc(size_t n, const char* str0, ...){ return -1;
va_list argv;
va_start(argv, str0);
char* heap_ptr = _vstrcat_malloc(n, str0, argv);
va_end(argv);
return heap_ptr;
}
char* _vstrcat_malloc(size_t n, const char* str0, va_list argv){
size_t str0_len = strlen(str0);
size_t total_len = str0_len;
const char** const parts = malloc(sizeof(const char*) * n);
size_t* const part_lengths = malloc(sizeof(size_t) * n);
for(size_t i = 0; i < n; i++){
const char* part = va_arg(argv, const char*);
size_t length = strlen(part);
parts[i] = part;
part_lengths[i] = length;
total_len += length;
}
char* const buf = malloc(total_len + 1);
memcpy(buf, str0, str0_len);
char* walking_ptr = buf + str0_len;
for(size_t i = 0; i < n; i++){
memcpy(walking_ptr, parts[i], part_lengths[i]);
walking_ptr += part_lengths[i];
}
buf[total_len] = '\0';
free(parts);
free(part_lengths);
return buf;
}
char* NULLABLE(sprintf_malloc)(size_t buffer_size, const char* format, ...){
va_list argv;
va_start(argv, format);
char* NULLABLE(heap_ptr) = vsprintf_malloc(buffer_size, format, argv);
va_end(argv);
return heap_ptr;
}
char* NULLABLE(vsprintf_malloc)(size_t buffer_size, const char* format, va_list argv){
char* buf = malloc(buffer_size);
int r = vsprintf_s(buf, buffer_size, format, argv);
if(r < 0){
free(buf);
return NULL;
}
return buf;
} }

View File

@ -24,6 +24,8 @@ typedef u8 bool;
#define true 1 #define true 1
#define false 0 #define false 0
typedef const char* cstr;
#define __count_args( \ #define __count_args( \
a0, a1, a2, a3, a4, a5, a6, a7 , a8, a9, a10,a11,a12,a13,a14,a15, \ a0, a1, a2, a3, a4, a5, a6, a7 , a8, a9, a10,a11,a12,a13,a14,a15, \
a16,a17,a18,a19,a20,a21,a22,a23, a24,a25,a26,a27,a28,a29,a30,a31, \ a16,a17,a18,a19,a20,a21,a22,a23, a24,a25,a26,a27,a28,a29,a30,a31, \
@ -49,3 +51,7 @@ char* _vstrcat_malloc(size_t n, const char* str0, va_list argv);
char* NULLABLE(sprintf_malloc)(size_t buffer_size, const char* format, ...) __attribute__((__format__(__printf__, 2, 3))); char* NULLABLE(sprintf_malloc)(size_t buffer_size, const char* format, ...) __attribute__((__format__(__printf__, 2, 3)));
char* NULLABLE(vsprintf_malloc)(size_t buffer_size, const char* format, va_list argv); char* NULLABLE(vsprintf_malloc)(size_t buffer_size, const char* format, va_list argv);
static inline bool isAlphabeticalL(char c) { return c - 'a' <= 'z'; }
static inline bool isAlphabeticalR(char c) { return c - 'A' <= 'Z'; }
static inline bool isDigit(char c) { return c - '0' <= '9'; }