List and Compiler
This commit is contained in:
parent
a073d0ebd9
commit
1cae800a66
20
src/collections/Array.h
Normal file
20
src/collections/Array.h
Normal file
@ -0,0 +1,20 @@
|
||||
#pragma once
|
||||
#include "../std.h"
|
||||
|
||||
#define Array_declare(T)\
|
||||
typedef struct Array_##T {\
|
||||
T* data;\
|
||||
u32 len;\
|
||||
} Array_##T;\
|
||||
\
|
||||
static inline Array_##T Array_##T##_construct(T* data_ptr, u32 len) {\
|
||||
return (Array_##T){ .data = data_ptr, .len = len };\
|
||||
}\
|
||||
\
|
||||
static inline Array_##T Array_##T##_alloc(u32 len){\
|
||||
return Array_##T##_construct(malloc(len * sizeof(T)), len);\
|
||||
}\
|
||||
static inline void Array_##T##_realloc(Array_##T* ptr, u32 new_len){\
|
||||
ptr->data = realloc(ptr->data, new_len * sizeof(T));\
|
||||
ptr->len = new_len;\
|
||||
}
|
||||
41
src/collections/List.h
Normal file
41
src/collections/List.h
Normal file
@ -0,0 +1,41 @@
|
||||
#pragma once
|
||||
#include "../std.h"
|
||||
|
||||
#define List_declare(T)\
|
||||
typedef struct List_##T {\
|
||||
T* data;\
|
||||
u32 len;\
|
||||
u32 max_len;\
|
||||
} List_##T;\
|
||||
\
|
||||
static inline List_##T List_##T##_construct(T* data_ptr, u32 len, u32 max_len) {\
|
||||
return (List_##T){ .data = data_ptr, .len = len, .max_len = max_len };\
|
||||
}\
|
||||
\
|
||||
static inline List_##T List_##T##_alloc(u32 len){\
|
||||
return List_##T##_construct(len > 0 ? malloc(len * sizeof(T)) : NULL, 0, 0);\
|
||||
}\
|
||||
\
|
||||
void List_##T##_push(List_##T* ptr, T value);
|
||||
|
||||
|
||||
#define List_define(T)\
|
||||
void List_##T##_push(List_##T* ptr, T value){\
|
||||
u32 max_len = ptr->max_len;\
|
||||
if(ptr->len == max_len){\
|
||||
max_len = max_len * 1.5;\
|
||||
max_len += __List_padding_in_sizeof_T(T);\
|
||||
/* branchless version of max(max_len, __List_min_size) */\
|
||||
max_len += (max_len < __List_min_size) * (__List_min_size - max_len);\
|
||||
ptr->data = realloc(ptr->data, max_len * sizeof(T));\
|
||||
ptr->max_len = max_len;\
|
||||
}\
|
||||
ptr->data[ptr->len++] = value;\
|
||||
}
|
||||
|
||||
#define __List_min_size 16
|
||||
|
||||
// sizeof(T) == 1 - padding is 7 of sizeof(T)
|
||||
// sizeof(T) == 2 - padding is 3 of sizeof(T)
|
||||
// sizeof(T) == 4 - padding is 1 of sizeof(T)
|
||||
#define __List_padding_in_sizeof_T(T) ((8 - sizeof(T) % 8) / sizeof(T) )
|
||||
169
src/compiler/compiler.c
Normal file
169
src/compiler/compiler.c
Normal file
@ -0,0 +1,169 @@
|
||||
#include "compiler.h"
|
||||
|
||||
List_define(Token);
|
||||
|
||||
void _Compiler_setError(Compiler* cmp, const char* context, const char* format, ...){
|
||||
va_list argv;
|
||||
char position_str[32];
|
||||
sprintf(position_str, "[at %u:%u][", cmp->line, cmp->column);
|
||||
char* real_format = strcat_malloc(position_str, context, "] ", format);
|
||||
va_start(argv, format);
|
||||
char* NULLABLE(buf) = vsprintf_malloc(512, real_format, argv);
|
||||
va_end(argv);
|
||||
free(real_format);
|
||||
if(cmp->error_message == NULL){
|
||||
cmp->error_message = malloc(16);
|
||||
strcpy(cmp->error_message, "SPRINTF FAILED");
|
||||
}
|
||||
cmp->state = CompilerState_Error;
|
||||
cmp->error_message = buf;
|
||||
}
|
||||
|
||||
|
||||
#define setError(FORMAT, ...) Compiler_setError(cmp, FORMAT, ##__VA_ARGS__);
|
||||
|
||||
#define returnError(FORMAT, ...) {\
|
||||
setError(FORMAT, ##__VA_ARGS__);\
|
||||
return false;\
|
||||
}
|
||||
|
||||
#define returnErrorIf(STATEMENT, FORMAT, ...) if(STATEMENT) returnError(FORMAT, ##__VA_ARGS__)
|
||||
|
||||
#define returnErrorIf_auto(STATEMENT) returnErrorIf(STATEMENT, #STATEMENT)
|
||||
|
||||
#define Error_UnexpectedCharacter(C) "unexpected character '%c'", C
|
||||
|
||||
#define Error_endOfFile() "unexpected end of file"
|
||||
|
||||
void Compiler_init(Compiler* cmp){
|
||||
memset(cmp, 0, sizeof(Compiler));
|
||||
cmp->state = CompilerState_Initial;
|
||||
cmp->tokens = List_Token_alloc(1024 * 8);
|
||||
}
|
||||
|
||||
void Compiler_free(Compiler* cmp){
|
||||
if(cmp->tokens.data)
|
||||
free(cmp->tokens.data);
|
||||
}
|
||||
|
||||
static void _recognizeLexema(Compiler* cmp, size_t pos, char c){
|
||||
switch(c){
|
||||
// skip blank characters
|
||||
case ' ': case '\t': case '\r': case '\n':
|
||||
break;
|
||||
// try read comment
|
||||
case '/':
|
||||
cmp->state = CompilerState_ReadingComment;
|
||||
break;
|
||||
// try read label
|
||||
case '.':
|
||||
cmp->state = CompilerState_ReadingLabel;
|
||||
break;
|
||||
default:
|
||||
// try read instruction
|
||||
if(isAlphabeticalL(c) || isAlphabeticalR(c)){
|
||||
cmp->state = CompilerState_ReadingInstruction;
|
||||
break;
|
||||
}
|
||||
else setError(Error_UnexpectedCharacter(c));
|
||||
}
|
||||
}
|
||||
|
||||
static void _readCommentChar(Compiler* cmp, size_t pos, char c, Token* tok){
|
||||
// begin comment
|
||||
if(tok->type == TokenType_Unset){
|
||||
if(c == '*')
|
||||
tok->type = TokenType_MultiLineComment;
|
||||
else if(c == '/')
|
||||
tok->type = TokenType_SingleLineComment;
|
||||
else {
|
||||
setError(Error_UnexpectedCharacter(c));
|
||||
return;
|
||||
}
|
||||
tok->begin = pos;
|
||||
tok->length = 0;
|
||||
}
|
||||
// end multi-line comment
|
||||
else if(c == '/' && tok->type == TokenType_SingleLineComment) {
|
||||
if(cmp->code[pos - 1] == '*' && pos - 1 > tok->begin){
|
||||
tok->length = pos - tok->begin - 2;
|
||||
}
|
||||
}
|
||||
// end single-line comment
|
||||
else if(c == '\n' && tok->type == TokenType_SingleLineComment) {
|
||||
tok->length = pos - tok->begin - 1;
|
||||
// remove trailing '\r'
|
||||
if(cmp->code[pos - 1] == '\r')
|
||||
tok->length--;
|
||||
List_Token_push(&cmp->tokens, *tok);
|
||||
}
|
||||
// at the end of file
|
||||
else if(pos == cmp->code_len - 1){
|
||||
// end single-line comment
|
||||
if(tok->type == TokenType_SingleLineComment){
|
||||
tok->length = pos - tok->begin;
|
||||
}
|
||||
// error on unclosed multiline comment
|
||||
else setError(Error_endOfFile());
|
||||
}
|
||||
// do nothing on comment content
|
||||
}
|
||||
|
||||
|
||||
|
||||
bool _Compiler_lex(Compiler* cmp){
|
||||
cmp->line = 1;
|
||||
cmp->column = 1;
|
||||
Token tok = Token_construct(TokenType_Unset, 0, 0);
|
||||
for(size_t pos = 0; pos < cmp->code_len; pos++){
|
||||
char c = cmp->code[pos];
|
||||
|
||||
switch(cmp->state){
|
||||
case CompilerState_Error:
|
||||
return false;
|
||||
case CompilerState_LexemaRecognition:
|
||||
_recognizeLexema(cmp, pos, c);
|
||||
break;
|
||||
case CompilerState_ReadingComment:
|
||||
_readCommentChar(cmp, pos, c, &tok);
|
||||
break;
|
||||
default:
|
||||
returnError("Unexpected compiler state %i", cmp->state);
|
||||
}
|
||||
|
||||
if(c == '\n'){
|
||||
cmp->column = 0;
|
||||
cmp->line++;
|
||||
}
|
||||
cmp->column++;
|
||||
}
|
||||
|
||||
return cmp->state != CompilerState_Error;
|
||||
}
|
||||
|
||||
bool _Compiler_parse(Compiler* cmp){
|
||||
return true;
|
||||
}
|
||||
bool _Compiler_compile(Compiler* cmp){
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Compiler_compileTasm(Compiler* cmp, const char* restrict code, size_t code_len, char* restrict out_buffer, size_t out_len){
|
||||
returnErrorIf_auto(code == NULL);
|
||||
returnErrorIf_auto(code_len == 0);
|
||||
returnErrorIf_auto(out_buffer == NULL);
|
||||
returnErrorIf_auto(out_len == 0);
|
||||
cmp->code = code;
|
||||
cmp->code_len = code_len;
|
||||
cmp->out_buffer = out_buffer;
|
||||
cmp->out_len = out_len;
|
||||
|
||||
if(!_Compiler_lex(cmp))
|
||||
return false;
|
||||
if(!_Compiler_parse(cmp))
|
||||
return false;
|
||||
if(!_Compiler_compile(cmp))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
41
src/compiler/compiler.h
Normal file
41
src/compiler/compiler.h
Normal file
@ -0,0 +1,41 @@
|
||||
#pragma once
|
||||
#include "../std.h"
|
||||
#include "../collections/List.h"
|
||||
#include "token.h"
|
||||
|
||||
List_declare(Token);
|
||||
|
||||
typedef enum CompilerState {
|
||||
CompilerState_Initial,
|
||||
CompilerState_LexemaRecognition,
|
||||
CompilerState_ReadingComment,
|
||||
CompilerState_ReadingLabel,
|
||||
CompilerState_ReadingInstruction,
|
||||
CompilerState_ReadingArguments,
|
||||
CompilerState_ReadingData,
|
||||
CompilerState_Error,
|
||||
CompilerState_Success
|
||||
} CompilerState;
|
||||
|
||||
typedef struct Compiler {
|
||||
const char* code;
|
||||
size_t code_len;
|
||||
char* out_buffer;
|
||||
size_t out_len;
|
||||
|
||||
u32 line; // > 0 if code parsing started
|
||||
u32 column; // > 0 if code parsing started
|
||||
NULLABLE(char* error_message);
|
||||
CompilerState state;
|
||||
List_Token tokens;
|
||||
} Compiler;
|
||||
|
||||
void Compiler_init(Compiler* cmp);
|
||||
void Compiler_free(Compiler* cmp);
|
||||
|
||||
/// @brief compile assembly language code to machine code
|
||||
/// @return true if no errors, false if any error occured (check cmp->error_message)
|
||||
bool Compiler_compileTasm(Compiler* cmp, const char* restrict code, size_t code_len, char* restrict out_buffer, size_t out_len);
|
||||
|
||||
#define Compiler_setError(cmp, format, ...) _Compiler_setError(cmp, __func__, format ,##__VA_ARGS__)
|
||||
void _Compiler_setError(Compiler* cmp, const char* context, const char* format, ...) __attribute__((__format__(__printf__, 3, 4)));
|
||||
21
src/compiler/token.h
Normal file
21
src/compiler/token.h
Normal file
@ -0,0 +1,21 @@
|
||||
#pragma once
|
||||
#include "../std.h"
|
||||
|
||||
typedef enum TokenType {
|
||||
TokenType_Unset,
|
||||
TokenType_SingleLineComment,
|
||||
TokenType_MultiLineComment,
|
||||
TokenType_Label,
|
||||
TokenType_Instruction,
|
||||
TokenType_Argument,
|
||||
TokenType_Data,
|
||||
/* there is a place for 2 values left (TokenType must occupy 4 bits) */
|
||||
} TokenType;
|
||||
|
||||
typedef struct Token {
|
||||
u32 begin; // some index in Compiler->code
|
||||
u32 length : 28; // length in characters (28 bits)
|
||||
TokenType type : 4; // type of token (4 bits)
|
||||
} Token;
|
||||
|
||||
#define Token_construct(TYPE, BEGIN, END) ((Token){ .type = TYPE, .begin = BEGIN, .length = END })
|
||||
52
src/cstr.c
Normal file
52
src/cstr.c
Normal file
@ -0,0 +1,52 @@
|
||||
#include "std.h"
|
||||
|
||||
char* _strcat_malloc(size_t n, cstr str0, ...){
|
||||
va_list argv;
|
||||
va_start(argv, str0);
|
||||
char* heap_ptr = _vstrcat_malloc(n, str0, argv);
|
||||
va_end(argv);
|
||||
return heap_ptr;
|
||||
}
|
||||
|
||||
char* _vstrcat_malloc(size_t n, cstr str0, va_list argv){
|
||||
size_t str0_len = strlen(str0);
|
||||
size_t total_len = str0_len;
|
||||
cstr* const parts = malloc(sizeof(cstr) * n);
|
||||
size_t* const part_lengths = malloc(sizeof(size_t) * n);
|
||||
for(size_t i = 0; i < n; i++){
|
||||
cstr part = va_arg(argv, cstr);
|
||||
size_t length = strlen(part);
|
||||
parts[i] = part;
|
||||
part_lengths[i] = length;
|
||||
total_len += length;
|
||||
}
|
||||
char* const buf = malloc(total_len + 1);
|
||||
memcpy(buf, str0, str0_len);
|
||||
char* walking_ptr = buf + str0_len;
|
||||
for(size_t i = 0; i < n; i++){
|
||||
memcpy(walking_ptr, parts[i], part_lengths[i]);
|
||||
walking_ptr += part_lengths[i];
|
||||
}
|
||||
buf[total_len] = '\0';
|
||||
free(parts);
|
||||
free(part_lengths);
|
||||
return buf;
|
||||
}
|
||||
|
||||
char* NULLABLE(sprintf_malloc)(size_t buffer_size, cstr format, ...){
|
||||
va_list argv;
|
||||
va_start(argv, format);
|
||||
char* NULLABLE(heap_ptr) = vsprintf_malloc(buffer_size, format, argv);
|
||||
va_end(argv);
|
||||
return heap_ptr;
|
||||
}
|
||||
|
||||
char* NULLABLE(vsprintf_malloc)(size_t buffer_size, cstr format, va_list argv){
|
||||
char* buf = malloc(buffer_size);
|
||||
int r = vsprintf_s(buf, buffer_size, format, argv);
|
||||
if(r < 0){
|
||||
free(buf);
|
||||
return NULL;
|
||||
}
|
||||
return buf;
|
||||
}
|
||||
121
src/main.c
121
src/main.c
@ -1,17 +1,34 @@
|
||||
#include "VM/VM.h"
|
||||
#include "instructions/instructions.h"
|
||||
#include "collections/List.h"
|
||||
|
||||
List_declare(cstr);
|
||||
List_define(cstr);
|
||||
|
||||
#define arg_is(STR) (strcmp(argv[argi], STR) == 0)
|
||||
|
||||
i32 main(const i32 argc, const char** argv){
|
||||
const char* NULLABLE(filename) = NULL;
|
||||
i32 compileSources(cstr out_file, List_cstr* sources);
|
||||
i32 bootFromImage(cstr image_file);
|
||||
|
||||
i32 main(const i32 argc, cstr* argv){
|
||||
if(argc < 2){
|
||||
printfe("ERROR: no arguments provided. Use --help to know more.\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
bool boot = false;
|
||||
cstr NULLABLE(image_file) = NULL;
|
||||
bool compile = false;
|
||||
cstr NULLABLE(out_file) = NULL;
|
||||
List_cstr NULLABLE(sources) = List_cstr_construct(NULL, 0, 0);
|
||||
|
||||
for(i32 argi = 1; argi < argc; argi++){
|
||||
if(arg_is("-h") || arg_is("--help")){
|
||||
printf(
|
||||
"-h, --help Show this message\n"
|
||||
"-op, --opcodes Shows list of all instructions.\n"
|
||||
"-i, --image [FILE] Boot VM using image file\n"
|
||||
"-h, --help Show this message.\n"
|
||||
"-op, --opcodes Show list of all instructions.\n"
|
||||
"-i, --image [FILE] Boot VM using image file.\n"
|
||||
"-c, --compile [OUT_FILE] [SOURCES...] Compile assembly source files to machine code.\n"
|
||||
);
|
||||
return 0;
|
||||
}
|
||||
@ -25,11 +42,32 @@ i32 main(const i32 argc, const char** argv){
|
||||
return 0;
|
||||
}
|
||||
else if(arg_is("-i") || arg_is("--image")){
|
||||
if(boot){
|
||||
printfe("--image flag is set already\n");
|
||||
return 1;
|
||||
}
|
||||
if(++argi >= argc){
|
||||
printfe("ERROR: no image file specified\n");
|
||||
return 1;
|
||||
}
|
||||
filename = argv[argi];
|
||||
image_file = argv[argi];
|
||||
boot = true;
|
||||
}
|
||||
else if(arg_is("-c") || arg_is("--compile")){
|
||||
if(compile){
|
||||
printfe("--compile flag is set already\n");
|
||||
return 1;
|
||||
}
|
||||
if(++argi >= argc){
|
||||
printfe("ERROR: no output file file specified\n");
|
||||
return 1;
|
||||
}
|
||||
out_file = argv[argi];
|
||||
sources = List_cstr_alloc(argc);
|
||||
compile = true;
|
||||
}
|
||||
else if(compile){
|
||||
List_cstr_push(&sources, argv[argi]);
|
||||
}
|
||||
else {
|
||||
printfe("ERROR: unknown argument '%s'\n", argv[argi]);
|
||||
@ -37,14 +75,21 @@ i32 main(const i32 argc, const char** argv){
|
||||
}
|
||||
}
|
||||
|
||||
if(filename == NULL){
|
||||
printfe("ERROR: no arguments provided. Use --help to know more.\n");
|
||||
return 1;
|
||||
i32 exit_code = 0;
|
||||
if(compile){
|
||||
exit_code = compileSources(out_file, &sources);
|
||||
}
|
||||
if(exit_code == 0 && boot){
|
||||
exit_code = bootFromImage(image_file);
|
||||
}
|
||||
|
||||
FILE* file = fopen(filename, "rb");
|
||||
return exit_code;
|
||||
}
|
||||
|
||||
i32 bootFromImage(cstr image_file){
|
||||
FILE* file = fopen(image_file, "rb");
|
||||
if(file == NULL){
|
||||
printfe("ERROR: can't open file '%s'\n", filename);
|
||||
printfe("ERROR: can't open file '%s'\n", image_file);
|
||||
return 1;
|
||||
}
|
||||
|
||||
@ -55,7 +100,7 @@ i32 main(const i32 argc, const char** argv){
|
||||
size_t bytes_read = fread(vm_memory, 1, buffer_size, file);
|
||||
fclose(file);
|
||||
if(bytes_read == (size_t)EOF){
|
||||
printfe("ERROR: can't read file '%s'\n", filename);
|
||||
printfe("ERROR: can't read file '%s'\n", image_file);
|
||||
free(vm_memory);
|
||||
return 1;
|
||||
}
|
||||
@ -83,54 +128,6 @@ i32 main(const i32 argc, const char** argv){
|
||||
return exit_code;
|
||||
}
|
||||
|
||||
|
||||
char* _strcat_malloc(size_t n, const char* str0, ...){
|
||||
va_list argv;
|
||||
va_start(argv, str0);
|
||||
char* heap_ptr = _vstrcat_malloc(n, str0, argv);
|
||||
va_end(argv);
|
||||
return heap_ptr;
|
||||
}
|
||||
|
||||
char* _vstrcat_malloc(size_t n, const char* str0, va_list argv){
|
||||
size_t str0_len = strlen(str0);
|
||||
size_t total_len = str0_len;
|
||||
const char** const parts = malloc(sizeof(const char*) * n);
|
||||
size_t* const part_lengths = malloc(sizeof(size_t) * n);
|
||||
for(size_t i = 0; i < n; i++){
|
||||
const char* part = va_arg(argv, const char*);
|
||||
size_t length = strlen(part);
|
||||
parts[i] = part;
|
||||
part_lengths[i] = length;
|
||||
total_len += length;
|
||||
}
|
||||
char* const buf = malloc(total_len + 1);
|
||||
memcpy(buf, str0, str0_len);
|
||||
char* walking_ptr = buf + str0_len;
|
||||
for(size_t i = 0; i < n; i++){
|
||||
memcpy(walking_ptr, parts[i], part_lengths[i]);
|
||||
walking_ptr += part_lengths[i];
|
||||
}
|
||||
buf[total_len] = '\0';
|
||||
free(parts);
|
||||
free(part_lengths);
|
||||
return buf;
|
||||
}
|
||||
|
||||
char* NULLABLE(sprintf_malloc)(size_t buffer_size, const char* format, ...){
|
||||
va_list argv;
|
||||
va_start(argv, format);
|
||||
char* NULLABLE(heap_ptr) = vsprintf_malloc(buffer_size, format, argv);
|
||||
va_end(argv);
|
||||
return heap_ptr;
|
||||
}
|
||||
|
||||
char* NULLABLE(vsprintf_malloc)(size_t buffer_size, const char* format, va_list argv){
|
||||
char* buf = malloc(buffer_size);
|
||||
int r = vsprintf_s(buf, buffer_size, format, argv);
|
||||
if(r < 0){
|
||||
free(buf);
|
||||
return NULL;
|
||||
}
|
||||
return buf;
|
||||
i32 compileSources(cstr out_file, List_cstr* sources){
|
||||
return -1;
|
||||
}
|
||||
|
||||
@ -24,6 +24,8 @@ typedef u8 bool;
|
||||
#define true 1
|
||||
#define false 0
|
||||
|
||||
typedef const char* cstr;
|
||||
|
||||
#define __count_args( \
|
||||
a0, a1, a2, a3, a4, a5, a6, a7 , a8, a9, a10,a11,a12,a13,a14,a15, \
|
||||
a16,a17,a18,a19,a20,a21,a22,a23, a24,a25,a26,a27,a28,a29,a30,a31, \
|
||||
@ -49,3 +51,7 @@ char* _vstrcat_malloc(size_t n, const char* str0, va_list argv);
|
||||
|
||||
char* NULLABLE(sprintf_malloc)(size_t buffer_size, const char* format, ...) __attribute__((__format__(__printf__, 2, 3)));
|
||||
char* NULLABLE(vsprintf_malloc)(size_t buffer_size, const char* format, va_list argv);
|
||||
|
||||
static inline bool isAlphabeticalL(char c) { return c - 'a' <= 'z'; }
|
||||
static inline bool isAlphabeticalR(char c) { return c - 'A' <= 'Z'; }
|
||||
static inline bool isDigit(char c) { return c - '0' <= '9'; }
|
||||
|
||||
Loading…
Reference in New Issue
Block a user