Compare commits

..

25 Commits

Author SHA1 Message Date
715a2cd82e temporarely disabled variable size reading 2025-02-05 17:26:01 +05:00
3fd45311c5 constant data names linking 2025-02-05 17:25:12 +05:00
b1b20d336d compile binary 2025-02-05 12:08:40 +05:00
53ca7e1b49 writeBinaryFile 2025-02-04 13:31:19 +05:00
fe6d690251 registers.h 2025-02-04 10:45:37 +05:00
51ef24bb53 fixed memory issues 2025-02-03 22:30:56 +05:00
422d967165 toUpper, toLower 2025-02-03 22:30:43 +05:00
5d275c8dd1 added compatibility with linux 2025-02-03 22:30:13 +05:00
2faff91981 Parser_parseOperation 2025-02-03 21:14:02 +05:00
b443367f46 HashMap 2025-02-03 12:30:01 +05:00
fd9e5dda78 List_tryRemoveAt 2025-02-03 12:29:31 +05:00
823223ffa7 cstr -> str where possible 2025-02-02 19:18:01 +05:00
8a38813ba5 StringBuilder_alloc 2025-02-02 19:16:29 +05:00
7c2809aae2 StringBuilder_removeFromEnd 2025-02-02 17:59:50 +05:00
9e9b43afb4 str_seek 2025-02-02 17:59:30 +05:00
2831474f79 str and StringBuilder 2025-02-02 00:48:34 +05:00
2d3e66dd38 rename fix 2025-01-28 15:18:20 +03:00
7f606dfaff List growth factor and padding 2025-01-27 09:42:34 +05:00
46e5eb1887 parseDataDefinition 2025-01-27 09:40:49 +05:00
e43a987e1e renamed files 2025-01-21 08:15:58 +05:00
83e28c9022 started working on parser 2025-01-21 01:17:30 +05:00
5c9197436f example 2025-01-20 22:52:44 +05:00
f710aa4199 compiler 2025-01-20 22:52:23 +05:00
facacc90f8 argument parsing 2025-01-20 22:42:32 +05:00
dbe8569a3b parser 2025-01-18 13:56:46 +05:00
37 changed files with 1850 additions and 614 deletions

2
.vscode/launch.json vendored
View File

@@ -7,7 +7,7 @@
"request": "launch", "request": "launch",
"program": "${workspaceFolder}/bin/tcpu", "program": "${workspaceFolder}/bin/tcpu",
"windows": { "program": "${workspaceFolder}/bin/tcpu.exe" }, "windows": { "program": "${workspaceFolder}/bin/tcpu.exe" },
"args": [ "-c", "s.tasm", "o.bin" ], "args": [ "-c", "../examples/s.tasm", "o.bin", "--debug" ],
"cwd": "${workspaceFolder}/bin", "cwd": "${workspaceFolder}/bin",
"preLaunchTask": "build_exec_dbg", "preLaunchTask": "build_exec_dbg",
"stopAtEntry": false, "stopAtEntry": false,

16
examples/s.tasm Normal file
View File

@@ -0,0 +1,16 @@
/*
"hello world" program in my assembly language
*/
.data:
// named array of 8-bit values
const8 msg "Hello, World :3\0"
.main:
push ax 1; // sys_write
push bx 1; // stdout
push cx @msg; // address of msg data
push dx #msg; // size of msg data
sys
push ax 0
exit

View File

@@ -49,7 +49,7 @@ i32 VM_boot(VM* vm){
while (vm->current_pos < vm->data_size){ while (vm->current_pos < vm->data_size){
u8 opcode = vm->data[vm->current_pos]; u8 opcode = vm->data[vm->current_pos];
const Instruction* instr = Instruction_getFromOpcode(opcode); const Instruction* instr = Instruction_getByOpcode(opcode);
// printfe("[at 0x%x] %02X %s\n", (u32)vm->current_pos, opcode, instr->name); // printfe("[at 0x%x] %02X %s\n", (u32)vm->current_pos, opcode, instr->name);
if(instr == NULL){ if(instr == NULL){
VM_setError(vm, "unknown opcode %02X", opcode); VM_setError(vm, "unknown opcode %02X", opcode);
@@ -77,7 +77,9 @@ i32 VM_boot(VM* vm){
bool VM_dataRead(VM* vm, void* dst, size_t pos, size_t size){ bool VM_dataRead(VM* vm, void* dst, size_t pos, size_t size){
if(pos + size >= vm->data_size){ if(pos + size >= vm->data_size){
VM_setError(vm, "can't read %lli bytes from 0x%x, because only %lli are avaliable", VM_setError(vm,
"can't read " IFWIN("%lli", "%li") " bytes from 0x%x, because only "
IFWIN("%lli", "%li") " are avaliable",
size, (u32)pos, vm->data_size - size); size, (u32)pos, vm->data_size - size);
return false; return false;
} }

View File

@@ -1,5 +1,6 @@
#pragma once #pragma once
#include "../std.h" #include "../std.h"
#include "../string/str.h"
typedef union Register { typedef union Register {
u32 u32v; u32 u32v;

View File

@@ -1,20 +1,24 @@
#pragma once #pragma once
#include "../std.h" #include "../std.h"
#define Array_construct(T, DATA, LEN) ((Array_##T){ .data = DATA, .len = LEN })
/// creates Array_##T from a const array
#define ARRAY(T, A...) Array_construct(T, ((T[])A), ARRAY_SIZE(((T[])A)))
#define Array_declare(T)\ #define Array_declare(T)\
typedef struct Array_##T {\ typedef struct Array_##T {\
T* data;\ T* data;\
u32 len;\ u32 len;\
} Array_##T;\ } Array_##T;\
\ \
static inline Array_##T Array_##T##_construct(T* data_ptr, u32 len) {\ static inline Array_##T Array_##T##_alloc(u32 len){\
return (Array_##T){ .data = data_ptr, .len = len };\ return Array_construct(T, (T*)malloc(len * sizeof(T)), len);\
}\ }\
\ static inline void Array_##T##_realloc(Array_##T* ptr, u32 new_len){\
static inline Array_##T Array_##T##_alloc(u32 len){\ ptr->data = (T*)realloc(ptr->data, new_len * sizeof(T));\
return Array_##T##_construct(malloc(len * sizeof(T)), len);\
}\
static inline void Array_##T##_realloc(Array_##T* ptr, u32 new_len){\
ptr->data = realloc(ptr->data, new_len * sizeof(T));\
ptr->len = new_len;\ ptr->len = new_len;\
} }
Array_declare(u8)
Array_declare(u32)

140
src/collections/HashMap.h Normal file
View File

@@ -0,0 +1,140 @@
#pragma once
#include "../std.h"
#include "../string/str.h"
#include "Array.h"
#include "List.h"
//TODO: sorting of bucket and binary search
//TODO: delayed deletion
#define __HashMap_HASH_FUNC str_hash32
#define __HashMapBucket_MAX_LEN 16
#define HashMap_DESTROY_VALUE_FUNC_NULL ((void (*)(void*))NULL)
/// call this in a header file
///@param T Value type
#define HashMap_declare(T)\
typedef struct KeyValue_##T {\
str key;\
T value;\
u32 hash;\
} KeyValue_##T;\
\
List_declare(KeyValue_##T);\
\
typedef struct HashMapBucket_##T {\
List_KeyValue_##T kvs;\
} HashMapBucket_##T;\
\
typedef struct HashMap_##T {\
HashMapBucket_##T* table;\
u32 height;\
u16 height_n;\
} HashMap_##T;\
\
void HashMap_##T##_alloc(HashMap_##T* ptr);\
void HashMap_##T##_free(HashMap_##T* ptr);\
T* NULLABLE(HashMap_##T##_tryGetPtr)(HashMap_##T* ptr, str key);\
bool HashMap_##T##_tryPush(HashMap_##T* ptr, str key, T value);\
bool HashMap_##T##_tryDelete(HashMap_##T* ptr, str key);\
/// call this in a source code file
///@param T Value type
///@param DESTROY_VALUE_FUNC `void foo (T*)` or HashMap_DESTROY_VALUE_FUNC_NULL
#define HashMap_define(T, DESTROY_VALUE_FUNC)\
List_define(KeyValue_##T);\
\
static const Array_u32 __HashMap_##T##_heights = ARRAY(u32, {\
17, 31, 61, 127, 257, 521, 1021, 2053, 4099, 8191, 16381, 32771,\
65521, 131071, 262147, 524287, 1048583, 2097169, 4194319,\
8388617, 16777213, 33554467, 67108859, 134217757, 268435493\
});\
\
void HashMap_##T##_alloc(HashMap_##T* ptr){\
ptr->height_n = 0;\
ptr->height = __HashMap_##T##_heights.data[0];\
ptr->table = (HashMapBucket_##T*)malloc(ptr->height * sizeof(HashMapBucket_##T));\
memset(ptr->table, 0, ptr->height * sizeof(HashMapBucket_##T));\
}\
\
void HashMap_##T##_free(HashMap_##T* ptr){\
for(u32 i = 0; i < ptr->height; i++){\
for(u32 j = 0; j < ptr->table[i].kvs.len; j++){\
KeyValue_##T* kv_ptr = &ptr->table[i].kvs.data[j];\
if(DESTROY_VALUE_FUNC){\
DESTROY_VALUE_FUNC(&kv_ptr->value);\
}\
free(kv_ptr->key.data);\
}\
\
free(ptr->table[i].kvs.data);\
}\
\
free(ptr->table);\
}\
\
T* NULLABLE(HashMap_##T##_tryGetPtr)(HashMap_##T* ptr, str key){\
u32 hash = __HashMap_HASH_FUNC(key);\
HashMapBucket_##T* bu = &ptr->table[hash % ptr->height];\
for(u32 i = 0; i < bu->kvs.len; i++){\
if(bu->kvs.data[i].hash == hash && str_equals(bu->kvs.data[i].key, key)){\
return &bu->kvs.data[i].value;\
}\
}\
\
return NULL;\
}\
\
bool HashMap_##T##_tryPush(HashMap_##T* ptr, str key, T value){\
u32 hash = __HashMap_HASH_FUNC(key);\
HashMapBucket_##T* bu = &ptr->table[hash % ptr->height];\
for(u32 i = 0; i < bu->kvs.len; i++){\
if(bu->kvs.data[i].hash == hash && str_equals(bu->kvs.data[i].key, key)){\
return false;\
}\
}\
\
if(bu->kvs.len >= __HashMapBucket_MAX_LEN){\
u32 height_expanded_n = ptr->height_n + 1;\
if(height_expanded_n >= __HashMap_##T##_heights.len){\
printf("ERROR: HashMap_" #T " IS FULL\n");\
return false;\
}\
\
u32 height_expanded = __HashMap_##T##_heights.data[height_expanded_n];\
HashMapBucket_##T* table_expanded = (HashMapBucket_##T*)malloc(height_expanded * sizeof(HashMapBucket_##T));\
memset(table_expanded, 0, height_expanded * sizeof(HashMapBucket_##T));\
for(u32 i = 0; i < height_expanded; i++){\
for(u32 j = 0; j < ptr->table[i].kvs.len; j++){\
KeyValue_##T kv = ptr->table[i].kvs.data[j];\
List_KeyValue_##T##_push(&table_expanded[kv.hash % height_expanded].kvs, kv);\
}\
\
free(ptr->table[i].kvs.data);\
}\
free(ptr->table);\
ptr->table = table_expanded;\
ptr->height = height_expanded;\
ptr->height_n = height_expanded_n;\
bu = &ptr->table[hash % ptr->height];\
}\
\
KeyValue_##T kv = { .key = str_copy(key), .value = value, .hash = hash };\
List_KeyValue_##T##_push(&bu->kvs, kv);\
return true;\
}\
\
bool HashMap_##T##_tryDelete(HashMap_##T* ptr, str key){\
u32 hash = __HashMap_HASH_FUNC(key);\
HashMapBucket_##T* bu = &ptr->table[hash % ptr->height];\
for(u32 i = 0; i < bu->kvs.len; i++){\
if(bu->kvs.data[i].hash == hash && str_equals(bu->kvs.data[i].key, key)){\
return List_KeyValue_##T##_tryRemoveAt(&bu->kvs, i);\
}\
}\
\
return false;\
}

View File

@@ -1,5 +1,4 @@
#include "List.h" #include "List.h"
List_define(cstr);
List_define(u32); List_define(u32);
List_define(u8); List_define(u8);

View File

@@ -1,6 +1,9 @@
#pragma once #pragma once
#include "../std.h" #include "../std.h"
// minimal max_len after initial (0)
#define __List_min_size 16
#define List_declare(T)\ #define List_declare(T)\
typedef struct List_##T {\ typedef struct List_##T {\
T* data;\ T* data;\
@@ -12,34 +15,61 @@
return (List_##T){ .data = data_ptr, .len = len, .max_len = max_len };\ return (List_##T){ .data = data_ptr, .len = len, .max_len = max_len };\
}\ }\
\ \
static inline List_##T List_##T##_alloc(u32 len){\ List_##T List_##T##_alloc(u32 initial_len);\
return List_##T##_construct(len > 0 ? malloc(len * sizeof(T)) : NULL, 0, 0);\
}\
\ \
void List_##T##_push(List_##T* ptr, T value); T* List_##T##_expand(List_##T* ptr, u32 count);\
void List_##T##_push(List_##T* ptr, T value);\
void List_##T##_pushMany(List_##T* ptr, T* values, u32 count);\
bool List_##T##_tryRemoveAt(List_##T* ptr, u32 i);\
#define List_define(T)\ #define List_define(T)\
void List_##T##_push(List_##T* ptr, T value){\ List_##T List_##T##_alloc(u32 initial_len){\
u32 max_len = ptr->max_len;\ if(initial_len == 0)\
if(ptr->len == max_len){\ return List_##T##_construct((T*)NULL, 0, 0);\
max_len = max_len * 1.5;\ u32 max_len = ALIGN_TO(initial_len, sizeof(void*)/sizeof(T));\
max_len += __List_padding_in_sizeof_T(T);\
/* branchless version of max(max_len, __List_min_size) */\ /* branchless version of max(max_len, __List_min_size) */\
max_len += (max_len < __List_min_size) * (__List_min_size - max_len);\ max_len += (max_len < __List_min_size) * (__List_min_size - max_len);\
ptr->data = realloc(ptr->data, max_len * sizeof(T));\ return List_##T##_construct((T*)malloc(max_len * sizeof(T)), 0, max_len);\
ptr->max_len = max_len;\ }\
\
T* List_##T##_expand(List_##T* ptr, u32 count){\
u32 occupied_len = ptr->len;\
u32 expanded_max_len = ptr->max_len;\
expanded_max_len += (expanded_max_len < __List_min_size) * (__List_min_size - expanded_max_len);\
ptr->len += count;\
while(ptr->len > expanded_max_len){\
expanded_max_len *= 2;\
}\
u32 alloc_size = expanded_max_len * sizeof(T);\
if(ptr->data == NULL)\
ptr->data = (T*)malloc(alloc_size);\
else ptr->data = (T*)realloc(ptr->data, alloc_size);\
ptr->max_len = expanded_max_len;\
return ptr->data + occupied_len;\
}\
\
void List_##T##_push(List_##T* ptr, T value){\
T* empty_cell_ptr = List_##T##_expand(ptr, 1);\
*empty_cell_ptr = value;\
}\
\
void List_##T##_pushMany(List_##T* ptr, T* values, u32 count){\
T* empty_cell_ptr = List_##T##_expand(ptr, count);\
memcpy(empty_cell_ptr, values, count * sizeof(T));\
}\
\
bool List_##T##_tryRemoveAt(List_##T* ptr, u32 i){\
if(ptr->len == 0 || i >= ptr->len)\
return false;\
\
ptr->len--;\
for(; i < ptr->len; i++){\
ptr->data[i] = ptr->data[i + 1];\
}\
return true;\
}\ }\
ptr->data[ptr->len++] = value;\
}
#define __List_min_size 16
// sizeof(T) == 1 - padding is 7 of sizeof(T)
// sizeof(T) == 2 - padding is 3 of sizeof(T)
// sizeof(T) == 4 - padding is 1 of sizeof(T)
#define __List_padding_in_sizeof_T(T) ((8 - sizeof(T) % 8) / sizeof(T) )
List_declare(cstr);
List_declare(u32); List_declare(u32);
List_declare(u8); List_declare(u8);

52
src/compiler/AST.c Normal file
View File

@@ -0,0 +1,52 @@
#include "AST.h"
List_define(Argument);
List_define(Operation);
List_define(DataDefinition);
List_define(Section);
static str _ArgumentType_str[] = {
STR("Unset"),
STR("Register"),
STR("ConstValue"),
STR("VarDataName"),
STR("ConstDataPointer"),
STR("ConstDataSize"),
};
str ArgumentType_toString(ArgumentType t){
if(t >= ARRAY_SIZE(_ArgumentType_str))
return STR("!!ArgumentType INDEX_ERROR!!");
return _ArgumentType_str[t];
}
void Section_init(Section* sec, str name){
sec->name = name;
sec->data = List_DataDefinition_alloc(256);
sec->code = List_Operation_alloc(1024);
}
void Section_free(Section* sec){
for(u32 i = 0; i < sec->data.len; i++){
free(sec->data.data[i].data.data);
}
free(sec->data.data);
for(u32 i = 0; i < sec->code.len; i++){
free(sec->code.data[i].args.data);
}
free(sec->code.data);
}
void AST_init(AST* ast){
ast->sections = List_Section_alloc(32);
}
void AST_free(AST* ast){
for(u32 i = 0; i != ast->sections.len; i++){
Section_free(&ast->sections.data[i]);
}
free(ast->sections.data);
}

65
src/compiler/AST.h Normal file
View File

@@ -0,0 +1,65 @@
#pragma once
#include "../std.h"
#include "../string/str.h"
#include "../instructions/instructions.h"
#include "../instructions/registers.h"
#include "../collections/List.h"
typedef enum ArgumentType {
ArgumentType_Unset,
ArgumentType_Register,
ArgumentType_ConstValue,
ArgumentType_VarDataName,
ArgumentType_ConstDataPointer,
ArgumentType_ConstDataSize,
} ArgumentType;
str ArgumentType_toString(ArgumentType t);
typedef struct Argument {
ArgumentType type;
union {
i64 i;
f64 f;
str data_name;
RegisterCode register_code;
} value;
} Argument;
List_declare(Argument);
typedef struct Operation {
List_Argument args;
Opcode opcode;
} Operation;
List_declare(Operation);
typedef struct DataDefinition {
str name;
List_u8 data;
u32 element_size;
} DataDefinition;
List_declare(DataDefinition);
typedef struct Section {
str name;
List_DataDefinition data;
List_Operation code;
} Section;
List_declare(Section);
void Section_init(Section* Section, str name);
void Section_free(Section* Section);
typedef struct AST {
List_Section sections;
} AST;
void AST_init(AST* ast);
void AST_free(AST* ast);

40
src/compiler/Binary.c Normal file
View File

@@ -0,0 +1,40 @@
#include "Binary.h"
List_define(ConstDataProps);
HashMap_define(ConstDataProps, HashMap_DESTROY_VALUE_FUNC_NULL);
List_define(NamedRef);
List_define(CompiledSection);
HashMap_define(CompiledSectionPtr, HashMap_DESTROY_VALUE_FUNC_NULL);
void CompiledSection_construct(CompiledSection* ptr, str name){
ptr->name = name;
ptr->next = NULL;
ptr->offset = 0;
ptr->const_data_props_list = List_ConstDataProps_construct(NULL, 0, 0);
ptr->named_refs = List_NamedRef_construct(NULL, 0, 0);
ptr->bytes = List_u8_alloc(64);
}
void CompiledSection_free(CompiledSection* ptr){
free(ptr->const_data_props_list.data);
free(ptr->named_refs.data);
free(ptr->bytes.data);
}
void BinaryObject_construct(BinaryObject* ptr){
ptr->section_list = List_CompiledSection_alloc(64);
HashMap_CompiledSectionPtr_alloc(&ptr->section_map);
HashMap_ConstDataProps_alloc(&ptr->const_data_map);
}
void BinaryObject_free(BinaryObject* ptr){
for(u32 i = 0; i < ptr->section_list.len; i++){
CompiledSection_free(&ptr->section_list.data[i]);
}
free(ptr->section_list.data);
HashMap_CompiledSectionPtr_free(&ptr->section_map);
HashMap_ConstDataProps_free(&ptr->const_data_map);
}

65
src/compiler/Binary.h Normal file
View File

@@ -0,0 +1,65 @@
#pragma once
#include "../std.h"
#include "../string/str.h"
#include "../instructions/instructions.h"
#include "../instructions/registers.h"
#include "../collections/List.h"
#include "../collections/HashMap.h"
#include "AST.h"
typedef struct CompiledSection CompiledSection;
typedef struct ConstDataProps {
str name;
u32 size; // size in bytes
u32 offset; // offset in bytes from section start
} ConstDataProps;
#define ConstDataProps_construct(NAME, SIZE, OFFSET) ((ConstDataProps){ .name = NAME, .size = SIZE, .offset = OFFSET})
List_declare(ConstDataProps);
HashMap_declare(ConstDataProps);
typedef enum NamedRefType {
NamedRefType_Unset,
NamedRefType_Ptr,
NamedRefType_Size,
} NamedRefType;
typedef struct NamedRef {
str name;
NamedRefType type;
u32 offset; // offset in bytes from section start
} NamedRef;
#define NamedRef_construct(NAME, TYPE, OFFSET) ((NamedRef){ .name = NAME, .type = TYPE, .offset = OFFSET})
List_declare(NamedRef);
typedef struct CompiledSection {
str name;
CompiledSection* next;
u32 offset;
List_ConstDataProps const_data_props_list;
List_NamedRef named_refs;
List_u8 bytes;
} CompiledSection;
void CompiledSection_construct(CompiledSection* ptr, str name);
void CompiledSection_free(CompiledSection* ptr);
List_declare(CompiledSection);
typedef CompiledSection* CompiledSectionPtr;
HashMap_declare(CompiledSectionPtr);
typedef struct BinaryObject {
List_CompiledSection section_list;
HashMap_CompiledSectionPtr section_map;
HashMap_ConstDataProps const_data_map;
u32 total_size;
} BinaryObject;
void BinaryObject_construct(BinaryObject* ptr);
void BinaryObject_free(BinaryObject* ptr);

365
src/compiler/Compiler.c Normal file
View File

@@ -0,0 +1,365 @@
#include "Compiler_internal.h"
HashMap_define(SectionPtr, HashMap_DESTROY_VALUE_FUNC_NULL);
void Compiler_init(Compiler* cmp){
memset(cmp, 0, sizeof(Compiler));
cmp->state = CompilerState_Initial;
cmp->tokens = List_Token_alloc(4096);
cmp->line_lengths = List_u32_alloc(1024);
AST_init(&cmp->ast);
BinaryObject_construct(&cmp->binary);
}
void Compiler_free(Compiler* cmp){
free(cmp->code.data);
free(cmp->tokens.data);
free(cmp->line_lengths.data);
AST_free(&cmp->ast);
BinaryObject_free(&cmp->binary);
}
CodePos Compiler_getLineAndColumn(Compiler* cmp, u32 pos){
u32 prev_lines_len = 0;
if(pos >= cmp->code.len)
return CodePos_create(0, 0);
for(u32 i = 0; i < cmp->line_lengths.len; i++){
u32 line_len = cmp->line_lengths.data[i];
if(prev_lines_len + line_len > pos)
return CodePos_create(i + 1, pos + 1 - prev_lines_len);
prev_lines_len += line_len;
}
return CodePos_create(0, 0);
}
void _Compiler_setError(Compiler* cmp, cstr context, cstr format, ...){
// happens at the end of file
if(cmp->pos >= cmp->code.len)
cmp->pos = cmp->code.len - 1;
char position_str[32];
CodePos code_pos = Compiler_getLineAndColumn(cmp, cmp->pos);
sprintf(position_str, "[at %u:%u][", code_pos.line, code_pos.column);
char* real_format = strcat_malloc(position_str, context, "] ", format);
va_list argv;
va_start(argv, format);
char* NULLABLE(buf) = vsprintf_malloc(512, real_format, argv);
va_end(argv);
free(real_format);
if(buf == NULL){
buf = malloc(16);
strcpy(buf, "SPRINTF FAILED");
}
cmp->state = CompilerState_Error;
cmp->error_message = buf;
}
#define setError(FORMAT, ...) {\
Compiler_setError(cmp, FORMAT, ##__VA_ARGS__);\
}
str Compiler_constructTokenStr(Compiler* cmp, Token t){
str s = str_construct((char*)(cmp->code.data + t.begin), t.length, false);
return s;
}
static bool compileSection(Compiler* cmp, Section* sec){
CompiledSection* cs = List_CompiledSection_expand(&cmp->binary.section_list, 1);
CompiledSection_construct(cs, sec->name);
if(!HashMap_CompiledSectionPtr_tryPush(&cmp->binary.section_map, cs->name, cs)){
returnError("duplicate section '%s'", str_copy(sec->name));
}
// compile code
u8 zeroes[8] = {0, 0, 0, 0, 0, 0, 0, 0};
for(u32 i = 0; i < sec->code.len; i++){
Operation* op = &sec->code.data[i];
List_u8_pushMany(&cs->bytes, (void*)&op->opcode, sizeof(op->opcode));
for(u32 j = 0; j < op->args.len; j++){
Argument* arg = &op->args.data[j];
switch(arg->type){
case ArgumentType_VarDataName:
returnError("argument type 'VarDataName' is not supported yet");
case ArgumentType_Unset:
returnError("ArgumentType is not set");
default:
returnError("invalid ArgumentType %i", arg->type);
case ArgumentType_Register:
List_u8_push(&cs->bytes, arg->value.register_code);
break;
case ArgumentType_ConstValue:
//TODO: add const value size parsing
List_u8_pushMany(&cs->bytes, (void*)&arg->value.i, 4);
break;
case ArgumentType_ConstDataPointer:
List_NamedRef_push(&cs->named_refs, NamedRef_construct(
arg->value.data_name,
NamedRefType_Ptr,
cs->bytes.len));
List_u8_pushMany(&cs->bytes, zeroes, 4);
break;
case ArgumentType_ConstDataSize:
List_NamedRef_push(&cs->named_refs, NamedRef_construct(
arg->value.data_name,
NamedRefType_Size,
cs->bytes.len));
List_u8_pushMany(&cs->bytes, zeroes, 4);
break;
}
}
}
// compile data
for(u32 i = 0; i < sec->data.len; i++){
DataDefinition* dd = &sec->data.data[i];
List_ConstDataProps_push(&cs->const_data_props_list, ConstDataProps_construct(dd->name, dd->data.len, cs->bytes.len));
List_u8_pushMany(&cs->bytes, dd->data.data, dd->data.len);
}
// TODO: push padding
return true;
}
static bool compileBinary(Compiler* cmp){
for(u32 i = 0; i < cmp->ast.sections.len; i++){
SectionPtr sec = &cmp->ast.sections.data[i];
if(!compileSection(cmp, sec)){
return false;
}
}
// find main section
str main_sec_name = STR("main");
CompiledSection** main_sec_ptrptr = HashMap_CompiledSectionPtr_tryGetPtr(&cmp->binary.section_map, main_sec_name);
if(main_sec_ptrptr == NULL){
returnError("no 'main' section was defined");
}
// create linked list of CompiledSection where main is the first
CompiledSection* prev_sec = *main_sec_ptrptr;
u32 total_size = 0;
for(u32 i = 0; i < cmp->binary.section_list.len; i++){
CompiledSection* sec = &cmp->binary.section_list.data[i];
total_size += sec->bytes.len;
if(str_equals(sec->name, main_sec_name))
continue;
prev_sec->next = sec;
sec->offset = prev_sec->offset + prev_sec->bytes.len;
ConstDataProps cd = ConstDataProps_construct(sec->name, sec->bytes.len, sec->offset);
if(!HashMap_ConstDataProps_tryPush(&cmp->binary.const_data_map, cd.name, cd)){
returnError("duplicate named data '%s'", str_copy(cd.name).data);
}
for(u32 j = 0; j < sec->const_data_props_list.len; j++){
cd = sec->const_data_props_list.data[j];
cd.offset += sec->offset;
if(!HashMap_ConstDataProps_tryPush(&cmp->binary.const_data_map, cd.name, cd)){
returnError("duplicate named data '%s'", str_copy(cd.name).data);
}
}
}
// insert calculated offsets into sections
for(u32 i = 0; i < cmp->binary.section_list.len; i++){
CompiledSection* sec = &cmp->binary.section_list.data[i];
for(u32 j = 0; j < sec->named_refs.len; j++){
NamedRef* ref = &sec->named_refs.data[j];
ConstDataProps* target_data = HashMap_ConstDataProps_tryGetPtr(
&cmp->binary.const_data_map, ref->name);
if(target_data == NULL){
returnError("can't find named data '%s'", str_copy(ref->name).data);
}
void* ref_value_ptr = sec->bytes.data + ref->offset;
switch(ref->type){
default:
returnError("invalid NamedRefType %i", ref->type);
case NamedRefType_Size:
*((u32*)ref_value_ptr) = target_data->size;
break;
case NamedRefType_Ptr:
*((u32*)ref_value_ptr) = target_data->offset;
break;
}
}
}
cmp->binary.total_size = total_size;
return true;
}
static bool writeBinaryFile(Compiler* cmp, FILE* f){
returnErrorIf_auto(cmp->state != CompilerState_Parsing);
cmp->state = CompilerState_Compiling;
if(!compileBinary(cmp)){
return false;
}
CompiledSection** main_sec_ptrptr = HashMap_CompiledSectionPtr_tryGetPtr(&cmp->binary.section_map, STR("main"));
if(main_sec_ptrptr == NULL){
returnError("no 'main' section was defined");
}
CompiledSection* sec = *main_sec_ptrptr;
while(sec){
fwrite(sec->bytes.data, 1, sec->bytes.len, f);
sec = sec->next;
}
//TODO: print warnings for unused sections
return true;
}
bool Compiler_compile(Compiler* cmp, cstr source_file_name, cstr out_file_name, bool debug_log){
FILE* f = fopen(source_file_name, "rb");
if(f == NULL)
returnError("ERROR: can't open file '%s'", source_file_name);
StringBuilder sb = StringBuilder_alloc(64 * 1024);
int ret;
while((ret = fgetc(f)) != EOF) {
StringBuilder_append_char(&sb, ret);
}
if(ferror(f)){
StringBuilder_free(&sb);
fclose(f);
returnError("can't read file '%s'", source_file_name);
}
fclose(f);
if(sb.buffer.len == 0){
StringBuilder_free(&sb);
returnError("soucre file is empty");
}
cmp->code = str_copy(StringBuilder_getStr(&sb));
StringBuilder_free(&sb);
f = fopen(out_file_name, "wb");
if(f == NULL){
returnError("ERROR: can't open file '%s'", out_file_name);
}
if(debug_log){
printf("===========================[%s]===========================\n", source_file_name);
fputs(cmp->code.data, stdout);
fputc('\n', stdout);
}
if(debug_log)
printf("===================================[lexing]===================================\n");
bool success = Compiler_lex(cmp);
if(debug_log){
printf("------------------------------------[lines]------------------------------------\n");
for(u32 i = 0; i < cmp->line_lengths.len; i++){
printf("[%u] length: %u\n", i+1, cmp->line_lengths.data[i]);
}
printf("------------------------------------[tokens]-----------------------------------\n");
for(u32 i = 0; i < cmp->tokens.len; i++){
Token t = cmp->tokens.data[i];
CodePos pos = Compiler_getLineAndColumn(cmp, t.begin);
char* tokstr = malloc(4096);
strncpy(tokstr, cmp->code.data + t.begin, t.length);
tokstr[t.length] = 0;
char* tokstr_stripped = tokstr;
while(*tokstr_stripped == '\r' || *tokstr_stripped == '\n'){
tokstr_stripped++;
}
printf("[l:%3u, c:%3u] %s '%s'\n",
pos.line, pos.column,
TokenType_toString(t.type).data, tokstr_stripped);
free(tokstr);
}
}
if(!success){
fclose(f);
return false;
}
if(debug_log)
printf("===================================[parsing]===================================\n");
success = Compiler_parse(cmp);
if (debug_log){
printf("-------------------------------------[AST]-------------------------------------\n");
for(u32 i = 0; i < cmp->ast.sections.len; i++){
Section* sec = &cmp->ast.sections.data[i];
str tmpstr = str_copy(sec->name);
printf("section '%s'\n", tmpstr.data);
free(tmpstr.data);
for(u32 j = 0; j < sec->data.len; j++){
DataDefinition* dd = &sec->data.data[j];
tmpstr = str_copy(dd->name);
printf(" const%u %s (len %u)\n", dd->element_size * 8, tmpstr.data, dd->data.len/dd->element_size);
free(tmpstr.data);
}
for(u32 j = 0; j < sec->code.len; j++){
Operation* op = &sec->code.data[j];
const Instruction* instr = Instruction_getByOpcode(op->opcode);
printf(" %s", instr->name.data);
for(u32 k = 0; k < op->args.len; k++){
Argument* arg = &op->args.data[k];
printf(" %s(", ArgumentType_toString(arg->type).data);
switch(arg->type){
default:
fclose(f);
returnError("invalid argument type %i", arg->type);
case ArgumentType_Register:
const char* register_names[] = {"null", "ax", "bx", "cx", "dx"};
printf("%s", register_names[arg->value.register_code]);
break;
case ArgumentType_ConstValue:
printf(IFWIN("%lli", "%li"), arg->value.i);
break;
case ArgumentType_ConstDataPointer:
tmpstr = str_copy(arg->value.data_name);
printf("@%s", tmpstr.data);
free(tmpstr.data);
break;
case ArgumentType_ConstDataSize:
tmpstr = str_copy(arg->value.data_name);
printf("#%s", tmpstr.data);
free(tmpstr.data);
break;
case ArgumentType_VarDataName:
tmpstr = str_copy(arg->value.data_name);
printf("%s", tmpstr.data);
free(tmpstr.data);
break;
}
printf(")");
}
printf("\n");
}
}
}
if(!success){
fclose(f);
return false;
}
if(debug_log)
printf("==================================[compiling]==================================\n");
success = writeBinaryFile(cmp, f);
fclose(f);
if(success){
cmp->state = CompilerState_Success;
}
return success;
}

View File

@@ -1,9 +1,10 @@
#pragma once #pragma once
#include "../std.h" #include "../std.h"
#include "../string/str.h"
#include "../collections/List.h" #include "../collections/List.h"
#include "token.h" #include "../collections/HashMap.h"
#include "Token.h"
List_declare(Token); #include "Binary.h"
typedef enum CompilerState { typedef enum CompilerState {
CompilerState_Initial, CompilerState_Initial,
@@ -14,15 +15,24 @@ typedef enum CompilerState {
CompilerState_Success CompilerState_Success
} CompilerState; } CompilerState;
typedef Section* SectionPtr;
HashMap_declare(SectionPtr);
typedef struct Compiler { typedef struct Compiler {
char* code; /* general fields */
u32 code_len; str code;
u32 column; // > 0 if code parsing started u32 column; // > 0 if code parsing started
u32 pos; u32 pos;
CompilerState state; CompilerState state;
NULLABLE(char* error_message); NULLABLE(char* error_message);
/* lexer fields */
List_Token tokens; List_Token tokens;
List_u32 line_lengths; List_u32 line_lengths;
/* parser fields */
AST ast;
u32 tok_i;
/* compiler fields */
BinaryObject binary;
} Compiler; } Compiler;
void Compiler_init(Compiler* cmp); void Compiler_init(Compiler* cmp);
@@ -30,18 +40,4 @@ void Compiler_free(Compiler* cmp);
/// @brief compile assembly language code to machine code /// @brief compile assembly language code to machine code
/// @return true if no errors, false if any error occured (check cmp->error_message) /// @return true if no errors, false if any error occured (check cmp->error_message)
bool Compiler_compileTasm(Compiler* cmp, cstr source_file_name, cstr out_file_name, bool debug); bool Compiler_compile(Compiler* cmp, cstr source_file_name, cstr out_file_name, bool debug);
#define Compiler_setError(cmp, format, ...) _Compiler_setError(cmp, __func__, format ,##__VA_ARGS__)
void _Compiler_setError(Compiler* cmp, cstr context, cstr format, ...) __attribute__((__format__(__printf__, 3, 4)));
typedef struct CodePos {
u32 line; // 0 on error
u32 column; // 0 on error
} CodePos;
#define CodePos_create(L, C) ((CodePos){ .line = L, .column = C })
/// @param pos index in code buffer
CodePos Compiler_getLineAndColumn(Compiler* cmp, u32 pos);

View File

@@ -0,0 +1,30 @@
#include "Compiler.h"
#include "../string/StringBuilder.h"
void _Compiler_setError(Compiler* cmp, cstr context, cstr format, ...) __attribute__((__format__(__printf__, 3, 4)));
#define Compiler_setError(cmp, format, ...) _Compiler_setError(cmp, __func__, format ,##__VA_ARGS__)
#define returnError(FORMAT, ...) {\
setError(FORMAT, ##__VA_ARGS__);\
return false;\
}
#define returnErrorIf(STATEMENT, FORMAT, ...) if(STATEMENT) returnError(FORMAT, ##__VA_ARGS__)
#define returnErrorIf_auto(STATEMENT) returnErrorIf(STATEMENT, #STATEMENT)
typedef struct CodePos {
u32 line; // 0 on error
u32 column; // 0 on error
} CodePos;
#define CodePos_create(L, C) ((CodePos){ .line = L, .column = C })
/// @param pos index in code buffer
CodePos Compiler_getLineAndColumn(Compiler* cmp, u32 pos);
str Compiler_constructTokenStr(Compiler* cmp, Token t);
bool Compiler_lex(Compiler* cmp);
bool Compiler_parse(Compiler* cmp);

269
src/compiler/Lexer.c Normal file
View File

@@ -0,0 +1,269 @@
#include "Compiler_internal.h"
#define setError(FORMAT, ...) {\
completeLine(cmp);\
Compiler_setError(cmp, FORMAT, ##__VA_ARGS__);\
}
#define Error_unexpectedCharacter(C) "unexpected character '%c'", C
#define Error_endOfFile "unexpected end of file"
static void completeLine(Compiler* cmp){
List_u32_push(&cmp->line_lengths, cmp->column);
cmp->column = 0;
}
static void readCommentSingleLine(Compiler* cmp){
char c; // '/'
Token tok = Token_construct(TokenType_SingleLineComment, cmp->pos - 1, 0);
cmp->column++;
cmp->pos++;
while(cmp->pos < cmp->code.len){
c = cmp->code.data[cmp->pos];
// end of line
if(c == '\r' || c == '\n'){
tok.length = cmp->pos - tok.begin;
List_Token_push(&cmp->tokens, tok);
// cmp->line will be increased in lex()
return;
}
cmp->column++;
cmp->pos++;
}
// end of file
tok.length = cmp->pos - tok.begin;
List_Token_push(&cmp->tokens, tok);
}
static void readCommentMultiLine(Compiler* cmp){
char c; // '*'
Token tok = Token_construct(TokenType_MultiLineComment, cmp->pos - 1, 0);
cmp->column++;
cmp->pos++;
while(cmp->pos < cmp->code.len){
c = cmp->code.data[cmp->pos];
// closing comment
if(cmp->pos > tok.begin + 3 && c == '/' && cmp->code.data[cmp->pos - 1] == '*') {
tok.length = cmp->pos - tok.begin + 1;
List_Token_push(&cmp->tokens, tok);
return;
}
if(c == '\n')
completeLine(cmp);
cmp->column++;
cmp->pos++;
}
// end of file
setError(Error_endOfFile);
}
static void readComment(Compiler* cmp){
char c; // '/'
if(cmp->pos + 1 == cmp->code.len){
setError(Error_endOfFile);
return;
}
c = cmp->code.data[cmp->pos + 1];
if(c == '\r' || c == '\n'){
setError(Error_unexpectedCharacter(cmp->code.data[--cmp->pos]));
return;
}
cmp->pos++;
cmp->column++;
if(c == '/')
readCommentSingleLine(cmp);
else if(c == '*')
readCommentMultiLine(cmp);
else setError(Error_unexpectedCharacter(c));
}
static void readLabel(Compiler* cmp){
char c; // '.'
cmp->pos++;
cmp->column++;
Token tok = Token_construct(TokenType_Label, cmp->pos, 0);
while(cmp->pos < cmp->code.len){
c = cmp->code.data[cmp->pos];
// end of line
if(c == ':' || c == '\r' || c == '\n'){
tok.length = cmp->pos - tok.begin;
if(tok.length > 0)
List_Token_push(&cmp->tokens, tok);
else setError(Error_unexpectedCharacter(cmp->code.data[--cmp->pos]));
// cmp->line will be increased in lex()
return;
}
if(!isAlphabeticalLower(c) && !isAlphabeticalUpper(c) && !isDigit(c) &&
c != '_' && c != '.'){
setError(Error_unexpectedCharacter(c));
return;
}
cmp->column++;
cmp->pos++;
}
// end of file
tok.length = cmp->pos - tok.begin;
if(tok.length > 0)
List_Token_push(&cmp->tokens, tok);
else setError(Error_endOfFile);
}
static void readArguments(Compiler* cmp){
char c; // space
Token tok = Token_construct(TokenType_Unset, cmp->pos, 0);
char quot = '\0'; // quotation character of a string value
while(cmp->pos < cmp->code.len){
c = cmp->code.data[cmp->pos];
// string argument reading
if(quot != '\0'){
if(c == quot && cmp->code.data[cmp->pos - 1] != '\\'){
quot = '\0';
}
else if(c == '\r' || c == '\n'){
setError("line end reached but string hasn't been closed yet");
return;
}
}
// end of operation
else if(c == '\r' || c == '\n' || c == ';'){
tok.length = cmp->pos - tok.begin;
if(tok.length > 0)
List_Token_push(&cmp->tokens, tok);
// cmp->line will be increased in lex()
return;
}
// new argument begins
else if(c == ' ' || c == '\t'){
tok.length = cmp->pos - tok.begin;
if(tok.length > 0)
List_Token_push(&cmp->tokens, tok);
tok = Token_construct(TokenType_Unset, cmp->pos + 1, 0);
}
else if(tok.type == TokenType_Unset){
if(c == '\''){
tok.type = TokenType_Char;
quot = c;
}
else if(c == '"'){
tok.type = TokenType_String;
quot = c;
}
else if(c == '@')
tok.type = TokenType_NamedDataPointer;
else if(c == '#')
tok.type = TokenType_NamedDataSize;
else if(isDigit(c))
tok.type = TokenType_Number;
else tok.type = TokenType_Name;
}
cmp->column++;
cmp->pos++;
}
// end of file
tok.length = cmp->pos - tok.begin;
if(tok.length > 0)
List_Token_push(&cmp->tokens, tok);
}
static void readInstruction(Compiler* cmp){
Token tok = Token_construct(TokenType_Instruction, cmp->pos, 0);
cmp->pos++;
cmp->column++;
while(cmp->pos < cmp->code.len){
char c = cmp->code.data[cmp->pos];
// end of line
if(c == '\r' || c == '\n' || c == ';'){
tok.length = cmp->pos - tok.begin;
List_Token_push(&cmp->tokens, tok);
tok = Token_construct(TokenType_OperationEnd, cmp->pos, 1);
List_Token_push(&cmp->tokens, tok);
// cmp->line will be increased in lex()
return;
}
// arguments begin
if(c == ' ' || c == '\t'){
tok.length = cmp->pos - tok.begin;
List_Token_push(&cmp->tokens, tok);
readArguments(cmp);
tok = Token_construct(TokenType_OperationEnd, cmp->pos, 1);
List_Token_push(&cmp->tokens, tok);
return;
}
if(!isAlphabeticalLower(c) && !isAlphabeticalUpper(c) && !isDigit(c)){
setError(Error_unexpectedCharacter(c));
return;
}
cmp->column++;
cmp->pos++;
}
// end of file
tok.length = cmp->pos - tok.begin;
List_Token_push(&cmp->tokens, tok);
tok = Token_construct(TokenType_OperationEnd, cmp->pos, 1);
List_Token_push(&cmp->tokens, tok);
}
bool Compiler_lex(Compiler* cmp){
returnErrorIf_auto(cmp->state != CompilerState_Initial);
cmp->state = CompilerState_Lexing;
cmp->column = 1;
while(cmp->pos < cmp->code.len){
char c = cmp->code.data[cmp->pos];
switch(c){
// skip blank characters
case ' ': case '\t': case '\r': case '\n':
break;
// try read comment
case '/':
readComment(cmp);
break;
// try read label
case '.':
readLabel(cmp);
break;
default:
// try read instruction
if(isAlphabeticalLower(c) || isAlphabeticalUpper(c))
readInstruction(cmp);
else returnError(Error_unexpectedCharacter(c));
break;
}
if(cmp->state == CompilerState_Error)
return false;
c = cmp->code.data[cmp->pos];
if(c == '\n')
completeLine(cmp);
cmp->column++;
cmp->pos++;
}
completeLine(cmp);
return true;
}

281
src/compiler/Parser.c Normal file
View File

@@ -0,0 +1,281 @@
#include "Compiler_internal.h"
#define setError(FORMAT, ...) {\
cmp->pos = cmp->tokens.data[cmp->tok_i].begin;\
Compiler_setError(cmp, FORMAT, ##__VA_ARGS__);\
}
#define setError_unexpectedToken(T) {\
str tok_str = str_copy(Compiler_constructTokenStr(cmp, T));\
cmp->pos = T.begin;\
Compiler_setError(cmp, "unexpected token '%s'", tok_str.data);\
free(tok_str.data);\
}
#define setError_unexpectedTokenChar(T, I) {\
cmp->pos = T.begin + I;\
Compiler_setError(cmp, "unexpected token '%c'", cmp->code.data[cmp->pos]);\
}
#define setError_unexpectedInstruction(T) {\
str tok_str = str_copy(Compiler_constructTokenStr(cmp, T));\
cmp->pos = T.begin;\
Compiler_setError(cmp, "unexpected instruction '%s'", tok_str.data);\
free(tok_str.data);\
}
#define Error_TokenUnset "token of undefined type"
#define Error_BitSize "invalid size in bits"
static void List_u8_pushBytes(List_u8* l, void* value, u32 startIndex, u32 count){
u8* v = value;
for(u32 byte_i = startIndex; byte_i < startIndex + count; byte_i++){
List_u8_push(l, v[byte_i]);
}
}
static inline bool isVarSizeBits(u32 B) { return (B == 8 || B == 16 || B == 32 || B == 64); }
static NULLABLE(str) resolveEscapeSequences(Compiler* cmp, str src){
StringBuilder sb = StringBuilder_alloc(src.len);
char c;
bool escaped = false;
for(u32 i = 0; i < src.len; i++){
c = src.data[i];
if(c == '\\'){
escaped = !escaped;
continue;
}
if(!escaped){
StringBuilder_append_char(&sb, c);
continue;
}
// escape codes
switch(c){
case '0':
StringBuilder_append_char(&sb, '\0');
break;
case 'n':
StringBuilder_append_char(&sb, '\n');
break;
case 'r':
StringBuilder_append_char(&sb, '\r');
break;
case 't':
StringBuilder_append_char(&sb, '\t');
break;
case 'e':
StringBuilder_append_char(&sb, '\e');
break;
default:
setError_unexpectedTokenChar(cmp->tokens.data[cmp->tok_i], i);
StringBuilder_free(&sb);
return str_null;
}
}
return StringBuilder_getStr(&sb);
}
static void parseDataDefinition(Compiler* cmp, str instr_name, DataDefinition* ddf){
i32 _element_size_bits;
str _instr_name_zero_terminated = str_copy(instr_name);
if(sscanf(_instr_name_zero_terminated.data, "const%i", &_element_size_bits) != 1 || !isVarSizeBits(_element_size_bits)){
free(_instr_name_zero_terminated.data);
setError(Error_BitSize);
return;
}
free(_instr_name_zero_terminated.data);
ddf->element_size = _element_size_bits / 8;
ddf->data = List_u8_alloc(32);
Token tok = cmp->tokens.data[++cmp->tok_i];
if(tok.type != TokenType_Name){
setError_unexpectedToken(tok);
return;
}
str tok_str = Compiler_constructTokenStr(cmp, tok);
str processed_str = str_null;
ddf->name = tok_str;
while(++cmp->tok_i < cmp->tokens.len){
tok = cmp->tokens.data[cmp->tok_i];
switch(tok.type){
case TokenType_SingleLineComment:
case TokenType_MultiLineComment:
// skip comments
break;
case TokenType_OperationEnd:
return;
case TokenType_Unset:
setError(Error_TokenUnset);
return;
default:
setError_unexpectedToken(tok);
return;
case TokenType_Number:
tok_str = Compiler_constructTokenStr(cmp, tok);
processed_str = str_copy(tok_str);
if(str_seekChar(tok_str, '.', 0) != -1){
f64 f = atof(processed_str.data);
List_u8_pushBytes(&ddf->data, &f, 8 - ddf->element_size, ddf->element_size);
}
else {
i64 i = atoll(processed_str.data);
List_u8_pushBytes(&ddf->data, &i, 8 - ddf->element_size, ddf->element_size);
}
free(processed_str.data);
break;
case TokenType_Char:
tok.begin += 1;
tok.length -= 2;
tok_str = Compiler_constructTokenStr(cmp, tok);
processed_str = resolveEscapeSequences(cmp, tok_str);
if(processed_str.len != ddf->element_size){
setError("can't fit char of size %i in %u bit variable", processed_str.len, _element_size_bits);
return;
}
List_u8_pushBytes(&ddf->data, processed_str.data, 0, processed_str.len);
free(processed_str.data);
break;
case TokenType_String:
tok.begin += 1;
tok.length -= 2;
tok_str = Compiler_constructTokenStr(cmp, tok);
processed_str = resolveEscapeSequences(cmp, tok_str);
List_u8_pushBytes(&ddf->data, processed_str.data, 0, processed_str.len);
free(processed_str.data);
break;
}
}
}
static void parseOperation(Compiler* cmp, str instr_name, Operation* operPtr){
Token tok = cmp->tokens.data[cmp->tok_i];
const Instruction* instr = Instruction_getByName(instr_name);
if(instr == NULL){
setError_unexpectedInstruction(tok);
return;
}
operPtr->opcode = instr->opcode;
operPtr->args = List_Argument_alloc(8);
Argument arg = (Argument){ .type = ArgumentType_Unset, .value.i = 0 };
str tok_str = str_null;
str processed_str = str_null;
while(++cmp->tok_i < cmp->tokens.len){
tok = cmp->tokens.data[cmp->tok_i];
switch(tok.type){
case TokenType_SingleLineComment:
case TokenType_MultiLineComment:
// skip comments
break;
case TokenType_OperationEnd:
return;
case TokenType_Unset:
setError(Error_TokenUnset);
return;
default:
setError_unexpectedToken(tok);
return;
case TokenType_Number:
arg.type = ArgumentType_ConstValue;
tok_str = Compiler_constructTokenStr(cmp, tok);
processed_str = str_copy(tok_str);
if(str_seekChar(tok_str, '.', 0) != -1){
arg.value.f = atof(processed_str.data);
}
else {
arg.value.i = atoll(processed_str.data);
}
free(processed_str.data);
List_Argument_push(&operPtr->args, arg);
break;
case TokenType_Name:
tok_str = Compiler_constructTokenStr(cmp, tok);
arg.value.register_code = RegisterCode_parse(tok_str);
if(arg.value.register_code != RegisterCode_Unset){
arg.type = ArgumentType_Register;
}
else {
arg.type = ArgumentType_VarDataName;
arg.value.data_name = tok_str;
}
List_Argument_push(&operPtr->args, arg);
break;
case TokenType_NamedDataPointer:
tok_str = Compiler_constructTokenStr(cmp, tok);
tok_str.data++;
tok_str.len--;
arg.type = ArgumentType_ConstDataPointer;
arg.value.data_name = tok_str;
List_Argument_push(&operPtr->args, arg);
break;
case TokenType_NamedDataSize:
tok_str = Compiler_constructTokenStr(cmp, tok);
tok_str.data++;
tok_str.len--;
arg.type = ArgumentType_ConstDataSize;
arg.value.data_name = tok_str;
List_Argument_push(&operPtr->args, arg);
break;
}
}
}
bool Compiler_parse(Compiler* cmp){
returnErrorIf_auto(cmp->state != CompilerState_Lexing);
cmp->state = CompilerState_Parsing;
Token tok;
Section* sec = NULL;
while(cmp->tok_i < cmp->tokens.len){
tok = cmp->tokens.data[cmp->tok_i];
switch(tok.type){
case TokenType_Unset:
returnError(Error_TokenUnset);
case TokenType_SingleLineComment:
case TokenType_MultiLineComment:
// skip comments
break;
case TokenType_Label:
// create new section
sec = List_Section_expand(&cmp->ast.sections, 1);
Section_init(sec, Compiler_constructTokenStr(cmp, tok));
break;
case TokenType_Instruction:
if(sec == NULL)
returnError("no section");
str instr_name = Compiler_constructTokenStr(cmp, tok);
// data definition starts with const
if(str_startsWith(instr_name, STR("const"))){
DataDefinition* dataDefPtr = List_DataDefinition_expand(&sec->data, 1);
parseDataDefinition(cmp, instr_name, dataDefPtr);
}
else {
Operation* operPtr = List_Operation_expand(&sec->code, 1);
parseOperation(cmp, instr_name, operPtr);
}
break;
default:
setError_unexpectedToken(tok);
return false;
}
if(cmp->state == CompilerState_Error)
return false;
cmp->tok_i++;
}
return true;
}

24
src/compiler/Token.c Normal file
View File

@@ -0,0 +1,24 @@
#include "Token.h"
List_define(Token);
static str _TokenType_str[] = {
STR("Unset"),
STR("SingleLineComment"),
STR("MultiLineComment"),
STR("Instruction"),
STR("Label"),
STR("Number"),
STR("Char"),
STR("String"),
STR("Name"),
STR("NamedDataPointer"),
STR("NamedDataSize"),
STR("OperationEnd"),
};
str TokenType_toString(TokenType t){
if(t >= ARRAY_SIZE(_TokenType_str))
return STR("!!TokenType INDEX_ERROR!!");
return _TokenType_str[t];
}

31
src/compiler/Token.h Normal file
View File

@@ -0,0 +1,31 @@
#pragma once
#include "../std.h"
#include "../string/str.h"
#include "../collections/List.h"
typedef enum TokenType {
TokenType_Unset, // initial value
TokenType_SingleLineComment, // //comment
TokenType_MultiLineComment, // /* comment */
TokenType_Instruction, // abc
TokenType_Label, // .abc:
TokenType_Number, // 0123
TokenType_Char, // 'A'
TokenType_String, // "aaaa"
TokenType_Name, // xyz
TokenType_NamedDataPointer, // @xyz
TokenType_NamedDataSize, // #xyz
TokenType_OperationEnd, // EOL or EOF or ;
} TokenType;
str TokenType_toString(TokenType t);
typedef struct Token {
u32 begin; // some index in Compiler->code
u32 length : 24; // length in characters (24 bits)
TokenType type : 8; // type of token (8 bits)
} Token;
List_declare(Token);
#define Token_construct(TYPE, BEGIN, LEN) ((Token){ .type = TYPE, .begin = BEGIN, .length = LEN })

View File

@@ -1,470 +0,0 @@
#include "compiler.h"
List_define(Token);
CodePos Compiler_getLineAndColumn(Compiler* cmp, u32 pos){
u32 prev_lines_len = 0;
if(pos >= cmp->code_len)
return CodePos_create(0, 0);
for(u32 i = 0; i < cmp->line_lengths.len; i++){
u32 line_len = cmp->line_lengths.data[i];
if(prev_lines_len + line_len > pos)
return CodePos_create(i + 1, pos + 1 - prev_lines_len);
prev_lines_len += line_len;
}
return CodePos_create(0, 0);
}
static void completeLine(Compiler* cmp){
List_u32_push(&cmp->line_lengths, cmp->column);
cmp->column = 0;
}
void _Compiler_setError(Compiler* cmp, cstr context, cstr format, ...){
completeLine(cmp);
// happens at the end of file
if(cmp->pos >= cmp->code_len)
cmp->pos = cmp->code_len - 1;
char position_str[32];
CodePos code_pos = Compiler_getLineAndColumn(cmp, cmp->pos);
sprintf(position_str, "[at %u:%u][", code_pos.line, code_pos.column);
char* real_format = strcat_malloc(position_str, context, "] ", format);
va_list argv;
va_start(argv, format);
char* NULLABLE(buf) = vsprintf_malloc(512, real_format, argv);
va_end(argv);
free(real_format);
if(buf == NULL){
buf = malloc(16);
strcpy(buf, "SPRINTF FAILED");
}
cmp->state = CompilerState_Error;
cmp->error_message = buf;
}
#define setError(FORMAT, ...) Compiler_setError(cmp, FORMAT, ##__VA_ARGS__)
#define returnError(FORMAT, ...) {\
setError(FORMAT, ##__VA_ARGS__);\
return false;\
}
#define returnErrorIf(STATEMENT, FORMAT, ...) if(STATEMENT) returnError(FORMAT, ##__VA_ARGS__)
#define returnErrorIf_auto(STATEMENT) returnErrorIf(STATEMENT, #STATEMENT)
#define Error_unexpectedCharacter(C) "unexpected character '%c'", C
#define Error_endOfFile "unexpected end of file"
void Compiler_init(Compiler* cmp){
memset(cmp, 0, sizeof(Compiler));
cmp->state = CompilerState_Initial;
cmp->tokens = List_Token_alloc(4096);
cmp->line_lengths = List_u32_alloc(1024);
}
void Compiler_free(Compiler* cmp){
free(cmp->code);
free(cmp->tokens.data);
free(cmp->line_lengths.data);
}
static void readCommentSingleLine(Compiler* cmp){
char c; // '/'
Token tok = Token_construct(TokenType_SingleLineComment, cmp->pos - 1, 0);
cmp->column++;
cmp->pos++;
while(cmp->pos < cmp->code_len){
c = cmp->code[cmp->pos];
// end of line
if(c == '\r' || c == '\n'){
tok.length = cmp->pos - tok.begin;
List_Token_push(&cmp->tokens, tok);
// cmp->line will be increased in lex()
return;
}
cmp->column++;
cmp->pos++;
}
// end of file
tok.length = cmp->pos - tok.begin;
List_Token_push(&cmp->tokens, tok);
}
static void readCommentMultiLine(Compiler* cmp){
char c; // '*'
Token tok = Token_construct(TokenType_MultiLineComment, cmp->pos - 1, 0);
cmp->column++;
cmp->pos++;
while(cmp->pos < cmp->code_len){
c = cmp->code[cmp->pos];
// closing comment
if(cmp->pos > tok.begin + 3 && c == '/' && cmp->code[cmp->pos - 1] == '*') {
tok.length = cmp->pos - tok.begin + 1;
List_Token_push(&cmp->tokens, tok);
return;
}
if(c == '\n')
completeLine(cmp);
cmp->column++;
cmp->pos++;
}
// end of file
setError(Error_endOfFile);
}
static void readComment(Compiler* cmp){
char c; // '/'
if(cmp->pos + 1 == cmp->code_len){
setError(Error_endOfFile);
return;
}
c = cmp->code[cmp->pos + 1];
if(c == '\r' || c == '\n'){
setError(Error_unexpectedCharacter(cmp->code[--cmp->pos]));
return;
}
cmp->pos++;
cmp->column++;
if(c == '/')
readCommentSingleLine(cmp);
else if(c == '*')
readCommentMultiLine(cmp);
else setError(Error_unexpectedCharacter(c));
}
static void readLabel(Compiler* cmp){
char c; // '.'
cmp->pos++;
cmp->column++;
Token tok = Token_construct(TokenType_Label, cmp->pos, 0);
while(cmp->pos < cmp->code_len){
c = cmp->code[cmp->pos];
// end of line
if(c == ':' || c == '\r' || c == '\n'){
tok.length = cmp->pos - tok.begin;
if(tok.length > 0)
List_Token_push(&cmp->tokens, tok);
else setError(Error_unexpectedCharacter(cmp->code[--cmp->pos]));
// cmp->line will be increased in lex()
return;
}
if(!isAlphabeticalLower(c) && !isAlphabeticalUpper(c) && !isDigit(c) &&
c != '_' && c != '.'){
setError(Error_unexpectedCharacter(c));
return;
}
cmp->column++;
cmp->pos++;
}
// end of file
tok.length = cmp->pos - tok.begin;
if(tok.length > 0)
List_Token_push(&cmp->tokens, tok);
else setError(Error_endOfFile);
}
static void readArguments(Compiler* cmp){
char c; // space
cmp->pos++;
cmp->column++;
Token tok = Token_construct(TokenType_Argument, cmp->pos, 0);
while(cmp->pos < cmp->code_len){
c = cmp->code[cmp->pos];
// end of line
if(c == '\r' || c == '\n' || c == ';'){
tok.length = cmp->pos - tok.begin;
if(tok.length > 0)
List_Token_push(&cmp->tokens, tok);
// cmp->line will be increased in lex()
return;
}
// new argument begins
if(c == ' ' || c == '\t'){
tok.length = cmp->pos - tok.begin;
if(tok.length > 0)
List_Token_push(&cmp->tokens, tok);
tok.begin = cmp->pos + 1;
}
cmp->column++;
cmp->pos++;
}
// end of file
tok.length = cmp->pos - tok.begin;
if(tok.length > 0)
List_Token_push(&cmp->tokens, tok);
}
static void readInstruction(Compiler* cmp){
Token tok = Token_construct(TokenType_Instruction, cmp->pos, 0);
cmp->pos++;
cmp->column++;
while(cmp->pos < cmp->code_len){
char c = cmp->code[cmp->pos];
// end of line
if(c == '\r' || c == '\n' || c == ';'){
tok.length = cmp->pos - tok.begin;
List_Token_push(&cmp->tokens, tok);
// cmp->line will be increased in lex()
return;
}
// arguments begin
if(c == ' ' || c == '\t'){
tok.length = cmp->pos - tok.begin;
List_Token_push(&cmp->tokens, tok);
readArguments(cmp);
return;
}
if(!isAlphabeticalLower(c) && !isAlphabeticalUpper(c) && !isDigit(c)){
setError(Error_unexpectedCharacter(c));
return;
}
cmp->column++;
cmp->pos++;
}
// end of file
tok.length = cmp->pos - tok.begin;
List_Token_push(&cmp->tokens, tok);
}
static void readChar(Compiler* cmp){
Token tok = Token_construct(TokenType_Char, cmp->pos, 0);
cmp->pos++;
cmp->column++;
while(cmp->pos < cmp->code_len){
char c = cmp->code[cmp->pos];
// end of line
if(c == '\r' || c == '\n'){
setError(Error_unexpectedCharacter(cmp->code[--cmp->pos]));
return;
}
if(c == '\''){
tok.length = cmp->pos - tok.begin + 1;
List_Token_push(&cmp->tokens, tok);
return;
}
cmp->column++;
cmp->pos++;
}
// end of file
setError(Error_endOfFile);
}
static void readString(Compiler* cmp){
Token tok = Token_construct(TokenType_String, cmp->pos, 0);
cmp->pos++;
cmp->column++;
while(cmp->pos < cmp->code_len){
char c = cmp->code[cmp->pos];
// end of line
if(c == '\r' || c == '\n'){
setError(Error_unexpectedCharacter(cmp->code[--cmp->pos]));
return;
}
if(c == '"'){
tok.length = cmp->pos - tok.begin + 1;
List_Token_push(&cmp->tokens, tok);
return;
}
cmp->column++;
cmp->pos++;
}
// end of file
setError(Error_endOfFile);
}
static void readNumber(Compiler* cmp){
Token tok = Token_construct(TokenType_Number, cmp->pos, 0);
cmp->pos++;
cmp->column++;
while(cmp->pos < cmp->code_len){
char c = cmp->code[cmp->pos];
if(c == '\r' || c == '\n' || c == ' ' || c == '\t' || c == ',' || c == ';'){
tok.length = cmp->pos - tok.begin;
List_Token_push(&cmp->tokens, tok);
return;
}
cmp->column++;
cmp->pos++;
}
// end of file
tok.length = cmp->pos - tok.begin;
List_Token_push(&cmp->tokens, tok);
}
static bool lex(Compiler* cmp){
returnErrorIf_auto(cmp->state != CompilerState_Initial);
cmp->state = CompilerState_Lexing;
cmp->column = 1;
while(cmp->pos < cmp->code_len){
char c = cmp->code[cmp->pos];
switch(c){
// skip blank characters
case ' ': case '\t': case '\r': case '\n':
break;
// try read comment
case '/':
readComment(cmp);
break;
// try read label
case '.':
readLabel(cmp);
break;
case '"':
readString(cmp);
break;
case '\'':
readChar(cmp);
break;
default:
// try read instruction
if(isAlphabeticalLower(c) || isAlphabeticalUpper(c))
readInstruction(cmp);
else if(isDigit(c))
readNumber(cmp);
else returnError(Error_unexpectedCharacter(c));
break;
}
if(cmp->state == CompilerState_Error)
return false;
c = cmp->code[cmp->pos];
if(c == '\n')
completeLine(cmp);
cmp->column++;
cmp->pos++;
}
completeLine(cmp);
return true;
}
static bool parse(Compiler* cmp){
returnErrorIf_auto(cmp->state != CompilerState_Lexing);
cmp->state = CompilerState_Parsing;
return true;
}
static bool compile(Compiler* cmp, FILE* f){
returnErrorIf_auto(cmp->state != CompilerState_Parsing);
cmp->state = CompilerState_Compiling;
return true;
}
bool Compiler_compileTasm(Compiler* cmp, cstr source_file_name, cstr out_file_name, bool debug){
FILE* f = fopen(source_file_name, "rb");
if(f == NULL)
returnError("ERROR: can't open file '%s'", source_file_name);
List_u8 buf = List_u8_alloc(64 * 1024);
int ret;
while((ret = fgetc(f)) != EOF) {
List_u8_push(&buf, ret);
}
if(ferror(f)){
free(buf.data);
fclose(f);
returnError("can't read file '%s'", source_file_name);
}
fclose(f);
if(buf.len == 0){
free(buf.data);
fclose(f);
returnError("soucre file is empty");
}
cmp->code = (char*)buf.data;
cmp->code_len = buf.len;
List_u8_push(&buf, 0);
f = fopen(out_file_name, "wb");
if(f == NULL){
free(buf.data);
returnError("ERROR: can't open file '%s'", out_file_name);
}
if(debug){
printf("----------------------------------[%s]---------------------------------\n", source_file_name);
fputs(cmp->code, stdout);
fputc('\n', stdout);
}
bool success = lex(cmp);
if(debug){
printf("------------------------------------[lines]-----------------------------------\n");
for(u32 i = 0; i < cmp->line_lengths.len; i++){
printf("[%u] length: %u\n", i+1, cmp->line_lengths.data[i]);
}
printf("------------------------------------[tokens]-----------------------------------\n");
for(u32 i = 0; i < cmp->tokens.len; i++){
Token t = cmp->tokens.data[i];
CodePos pos = Compiler_getLineAndColumn(cmp, t.begin);
char* tokstr = malloc(4096);
strncpy(tokstr, cmp->code + t.begin, t.length);
tokstr[t.length] = 0;
printf("[l:%3u, c:%3u] %s '%s'\n",
pos.line, pos.column,
TokenType_toString(t.type), tokstr);
free(tokstr);
}
}
if(!success){
fclose(f);
return false;
}
success = parse(cmp);
if(!success){
fclose(f);
return false;
}
success = compile(cmp, f);
fclose(f);
if(success){
cmp->state = CompilerState_Success;
}
return success;
}

View File

@@ -1,19 +0,0 @@
#include "token.h"
static cstr _TokenType_str[] = {
"Unset",
"SingleLineComment",
"MultiLineComment",
"Label",
"Instruction",
"Argument",
"Number",
"Char",
"String",
};
cstr TokenType_toString(TokenType t){
if(t >= sizeof(_TokenType_str) / sizeof(cstr))
return "!!INDEX_ERROR!!";
return _TokenType_str[t];
}

View File

@@ -1,24 +0,0 @@
#pragma once
#include "../std.h"
typedef enum TokenType {
TokenType_Unset,
TokenType_SingleLineComment,
TokenType_MultiLineComment,
TokenType_Label,
TokenType_Instruction,
TokenType_Argument,
TokenType_Number,
TokenType_Char,
TokenType_String,
} TokenType;
cstr TokenType_toString(TokenType t);
typedef struct Token {
u32 begin; // some index in Compiler->code
u32 length : 24; // length in characters (24 bits)
TokenType type : 8; // type of token (8 bits)
} Token;
#define Token_construct(TYPE, BEGIN, END) ((Token){ .type = TYPE, .begin = BEGIN, .length = END })

View File

@@ -43,7 +43,7 @@ char* NULLABLE(sprintf_malloc)(size_t buffer_size, cstr format, ...){
char* NULLABLE(vsprintf_malloc)(size_t buffer_size, cstr format, va_list argv){ char* NULLABLE(vsprintf_malloc)(size_t buffer_size, cstr format, va_list argv){
char* buf = malloc(buffer_size); char* buf = malloc(buffer_size);
int r = vsprintf_s(buf, buffer_size, format, argv); int r = vsprintf(buf, format, argv);
if(r < 0){ if(r < 0){
free(buf); free(buf);
return NULL; return NULL;

View File

@@ -4,8 +4,9 @@
i32 PUSH_impl(VM* vm){ i32 PUSH_impl(VM* vm){
u8 dst_register_i = 0; u8 dst_register_i = 0;
readRegisterVar(dst_register_i); readRegisterVar(dst_register_i);
u8 value_size = 0; /*u8 value_size = 0;
readValueSizeVar(value_size); readValueSizeVar(value_size);*/
u8 value_size = 4;\
vm->registers[dst_register_i].u32v = 0; vm->registers[dst_register_i].u32v = 0;
if(!VM_dataRead(vm, &vm->registers[dst_register_i].u32v, vm->current_pos, value_size)) if(!VM_dataRead(vm, &vm->registers[dst_register_i].u32v, vm->current_pos, value_size))

View File

@@ -1,5 +1,6 @@
#pragma once #pragma once
#include "../instructions.h" #include "../instructions.h"
#include "../registers.h"
#define readVar(VAR) {\ #define readVar(VAR) {\
if(!VM_dataRead(vm, &VAR, vm->current_pos, sizeof(VAR))) \ if(!VM_dataRead(vm, &VAR, vm->current_pos, sizeof(VAR))) \
@@ -8,7 +9,7 @@
} }
#define validateRegisterIndex(VAR) {\ #define validateRegisterIndex(VAR) {\
if(VAR > sizeof(vm->registers)){\ if(VAR> sizeof(vm->registers)){\
VM_setError(vm, "invalid register index (%x)", VAR);\ VM_setError(vm, "invalid register index (%x)", VAR);\
return -1;\ return -1;\
}\ }\
@@ -16,9 +17,11 @@
#define readRegisterVar(VAR) {\ #define readRegisterVar(VAR) {\
readVar(VAR);\ readVar(VAR);\
VAR -= 1;\
validateRegisterIndex(VAR);\ validateRegisterIndex(VAR);\
} }
/*
#define validateValueSize(VAR) {\ #define validateValueSize(VAR) {\
if(VAR < 1 || VAR > 4){\ if(VAR < 1 || VAR > 4){\
VM_setError(vm, "invalid value_size (%x)", VAR);\ VM_setError(vm, "invalid value_size (%x)", VAR);\
@@ -30,3 +33,4 @@
readVar(VAR);\ readVar(VAR);\
validateValueSize(VAR);\ validateValueSize(VAR);\
} }
*/

View File

@@ -4,8 +4,9 @@
u8 dst_register_i = 0, src_register_i = 0;\ u8 dst_register_i = 0, src_register_i = 0;\
readRegisterVar(dst_register_i);\ readRegisterVar(dst_register_i);\
readRegisterVar(src_register_i);\ readRegisterVar(src_register_i);\
u8 value_size = 0;\ /*u8 value_size = 0;\
readValueSizeVar(value_size);\ readValueSizeVar(value_size);*/\
u8 value_size = 4;\
\ \
switch(value_size){\ switch(value_size){\
case 1: \ case 1: \

View File

@@ -1,6 +1,21 @@
#include "instructions.h" #include "instructions.h"
#include "../collections/HashMap.h"
const Instruction instructions[] = { i32 NOP_impl(VM* vm);
i32 PUSH_impl(VM* vm);
i32 MOV_impl(VM* vm);
i32 ADD_impl(VM* vm);
i32 SUB_impl(VM* vm);
i32 MUL_impl(VM* vm);
i32 DIV_impl(VM* vm);
i32 MOD_impl(VM* vm);
i32 SYS_impl(VM* vm);
i32 EXIT_impl(VM* vm);
i32 JMP_impl(VM* vm);
i32 CALL_impl(VM* vm);
Array_declare(Instruction);
static const Array_Instruction instructions_array = ARRAY(Instruction, {
Instruction_construct(NOP), Instruction_construct(NOP),
Instruction_construct(PUSH), Instruction_construct(PUSH),
Instruction_construct(MOV), Instruction_construct(MOV),
@@ -13,12 +28,38 @@ const Instruction instructions[] = {
Instruction_construct(EXIT), Instruction_construct(EXIT),
// Instruction_construct(JMP), // Instruction_construct(JMP),
// Instruction_construct(CALL), // Instruction_construct(CALL),
}; });
const size_t instructions_count = sizeof(instructions)/sizeof(instructions[0]);
const Instruction* NULLABLE(Instruction_getFromOpcode)(u8 opcode){ const Instruction* Instruction_getByOpcode(Opcode opcode){
if(opcode >= instructions_count) if(opcode >= instructions_array.len)
return NULL; return NULL;
return instructions + opcode; return instructions_array.data + opcode;
}
HashMap_declare(Instruction);
HashMap_define(Instruction, HashMap_DESTROY_VALUE_FUNC_NULL);
static HashMap_Instruction* instructions_map = NULL;
const Instruction* Instruction_getByName(str name){
if(instructions_map == NULL){
instructions_map = malloc(sizeof(HashMap_Instruction));
HashMap_Instruction_alloc(instructions_map);
for(u32 i = 0; i < instructions_array.len; i++){
HashMap_Instruction_tryPush(instructions_map, instructions_array.data[i].name, instructions_array.data[i]);
}
}
str name_upper = str_toUpper(name);
Instruction* iptr = HashMap_Instruction_tryGetPtr(instructions_map, name_upper);
free(name_upper.data);
return iptr;
}
void Instruction_freeSearchStructs(){
if(instructions_map != NULL){
HashMap_Instruction_free(instructions_map);
free(instructions_map);
}
} }

View File

@@ -5,30 +5,34 @@
///@returns number of bytes read ///@returns number of bytes read
typedef i32 (*InstructionImplFunc_t)(VM* vm); typedef i32 (*InstructionImplFunc_t)(VM* vm);
typedef enum __attribute__((__packed__)) Opcode {
Opcode_NOP,
Opcode_PUSH,
Opcode_MOV,
Opcode_ADD,
Opcode_SUB,
Opcode_MUL,
Opcode_DIV,
Opcode_MOD,
Opcode_SYS,
Opcode_EXIT,
} Opcode;
typedef struct Instruction { typedef struct Instruction {
cstr name; str name;
InstructionImplFunc_t implementation; InstructionImplFunc_t implementation;
Opcode opcode;
} Instruction; } Instruction;
#define Instruction_construct(NAME) {\ #define Instruction_construct(NAME) {\
.name = #NAME, \ .name = STR(#NAME), \
.implementation = NAME##_impl \ .implementation = NAME##_impl, \
.opcode = Opcode_##NAME\
} }
/// @brief get instruction info from table /// @brief get instruction info from table
/// @param opcode any byte /// @param opcode any byte
/// @return ptr to struct or NULL /// @return ptr to struct or NULL
const Instruction* NULLABLE(Instruction_getFromOpcode)(u8 opcode); const Instruction* NULLABLE(Instruction_getByOpcode)(Opcode opcode);
const Instruction* NULLABLE(Instruction_getByName)(str name);
i32 NOP_impl(VM* vm); void Instruction_freeSearchStructs();
i32 PUSH_impl(VM* vm);
i32 MOV_impl(VM* vm);
i32 ADD_impl(VM* vm);
i32 SUB_impl(VM* vm);
i32 MUL_impl(VM* vm);
i32 DIV_impl(VM* vm);
i32 MOD_impl(VM* vm);
i32 SYS_impl(VM* vm);
i32 EXIT_impl(VM* vm);
i32 JMP_impl(VM* vm);
i32 CALL_impl(VM* vm);

View File

@@ -0,0 +1,13 @@
#include "registers.h"
RegisterCode RegisterCode_parse(str r){
if(str_equals(r, STR("ax")))
return RegisterCode_ax;
if(str_equals(r, STR("bx")))
return RegisterCode_bx;
if(str_equals(r, STR("cx")))
return RegisterCode_cx;
if(str_equals(r, STR("dx")))
return RegisterCode_dx;
return RegisterCode_Unset;
}

View File

@@ -0,0 +1,13 @@
#pragma once
#include "../std.h"
#include "../string/str.h"
typedef enum RegisterCode {
RegisterCode_Unset,
RegisterCode_ax,
RegisterCode_bx,
RegisterCode_cx,
RegisterCode_dx
} RegisterCode;
RegisterCode RegisterCode_parse(str register_name);

View File

@@ -1,11 +1,11 @@
#include "VM/VM.h" #include "VM/VM.h"
#include "instructions/instructions.h" #include "instructions/instructions.h"
#include "collections/List.h" #include "collections/List.h"
#include "compiler/compiler.h" #include "compiler/Compiler.h"
#define arg_is(STR) (strcmp(argv[argi], STR) == 0) #define arg_is(STR) (strcmp(argv[argi], STR) == 0)
i32 compileSources(cstr source_file, cstr out_file); i32 compileSources(cstr source_file, cstr out_file, bool debug_log);
i32 bootFromImage(cstr image_file); i32 bootFromImage(cstr image_file);
i32 main(const i32 argc, cstr* argv){ i32 main(const i32 argc, cstr* argv){
@@ -21,6 +21,8 @@ i32 main(const i32 argc, cstr* argv){
cstr NULLABLE(out_file) = NULL; cstr NULLABLE(out_file) = NULL;
cstr NULLABLE(source_file) = NULL; cstr NULLABLE(source_file) = NULL;
bool debug_log = false;
for(i32 argi = 1; argi < argc; argi++){ for(i32 argi = 1; argi < argc; argi++){
if(arg_is("-h") || arg_is("--help")){ if(arg_is("-h") || arg_is("--help")){
printf( printf(
@@ -28,14 +30,15 @@ i32 main(const i32 argc, cstr* argv){
"-op, --opcodes Show list of all instructions.\n" "-op, --opcodes Show list of all instructions.\n"
"-i, --image [FILE] Boot VM using image file.\n" "-i, --image [FILE] Boot VM using image file.\n"
"-c, --compile [SOURCE_FILE] [OUT_FILE] Compile assembly source files to machine code.\n" "-c, --compile [SOURCE_FILE] [OUT_FILE] Compile assembly source files to machine code.\n"
"-d, --debug Enable debug log.\n"
); );
return 0; return 0;
} }
else if(arg_is("-op") || arg_is("--opcodes")){ else if(arg_is("-op") || arg_is("--opcodes")){
for(u8 opcode = 0; opcode < 255; opcode++){ for(u8 opcode = 0; opcode < 255; opcode++){
const Instruction* instr = Instruction_getFromOpcode(opcode); const Instruction* instr = Instruction_getByOpcode(opcode);
if(instr != NULL){ if(instr != NULL){
printf("%02X %s\n", opcode, instr->name); printf("%02X %s\n", opcode, instr->name.data);
} }
} }
return 0; return 0;
@@ -72,6 +75,9 @@ i32 main(const i32 argc, cstr* argv){
} }
out_file = argv[argi]; out_file = argv[argi];
} }
else if(arg_is("-d") || arg_is("--debug")){
debug_log = true;
}
else { else {
printfe("ERROR: unknown argument '%s'\n", argv[argi]); printfe("ERROR: unknown argument '%s'\n", argv[argi]);
return 1; return 1;
@@ -80,12 +86,14 @@ i32 main(const i32 argc, cstr* argv){
i32 exit_code = 0; i32 exit_code = 0;
if(compile){ if(compile){
exit_code = compileSources(source_file, out_file); exit_code = compileSources(source_file, out_file, debug_log);
} }
if(exit_code == 0 && boot){ if(exit_code == 0 && boot){
exit_code = bootFromImage(image_file); exit_code = bootFromImage(image_file);
} }
// frees global variables to supress valgrind memory leak errors
Instruction_freeSearchStructs();
return exit_code; return exit_code;
} }
@@ -131,19 +139,20 @@ i32 bootFromImage(cstr image_file){
return exit_code; return exit_code;
} }
i32 compileSources(cstr source_file, cstr out_file){ i32 compileSources(cstr source_file, cstr out_file, bool debug_log){
Compiler cmp; Compiler cmp;
Compiler_init(&cmp); Compiler_init(&cmp);
bool success = Compiler_compileTasm(&cmp, source_file, out_file, true); bool success = Compiler_compile(&cmp, source_file, out_file, debug_log);
Compiler_free(&cmp);
if(!success){ if(!success){
if(cmp.error_message){ if(cmp.error_message){
printfe("COMPILER ERROR: %s\n", cmp.error_message); printfe("COMPILER ERROR: %s\n", cmp.error_message);
free(cmp.error_message); free(cmp.error_message);
} }
else printfe("COMPILER ERROR: unknown (error_message is null)\n"); else printfe("COMPILER ERROR: unknown (error_message is null)\n");
Compiler_free(&cmp);
return 111; return 111;
} }
Compiler_free(&cmp);
return 0; return 0;
} }

View File

@@ -26,6 +26,15 @@ typedef u8 bool;
typedef const char* cstr; typedef const char* cstr;
#if defined(_WIN64) || defined(_WIN32)
#define IFWIN(YES, NO) YES
#else
#define IFWIN(YES, NO) NO
#endif
#define ARRAY_SIZE(A) (sizeof(A)/sizeof(A[0]))
#define ALIGN_TO(_SIZE,_ALIGN) (((_SIZE) + ((_ALIGN) - 1)) & ~((_ALIGN) - 1))
#define __count_args( \ #define __count_args( \
a0, a1, a2, a3, a4, a5, a6, a7 , a8, a9, a10,a11,a12,a13,a14,a15, \ a0, a1, a2, a3, a4, a5, a6, a7 , a8, a9, a10,a11,a12,a13,a14,a15, \
a16,a17,a18,a19,a20,a21,a22,a23, a24,a25,a26,a27,a28,a29,a30,a31, \ a16,a17,a18,a19,a20,a21,a22,a23, a24,a25,a26,a27,a28,a29,a30,a31, \

View File

@@ -0,0 +1,53 @@
#include "StringBuilder.h"
void StringBuilder_free(StringBuilder* b){
free(b->buffer.data);
b->buffer = List_u8_construct(NULL, 0, 0);
}
str StringBuilder_getStr(StringBuilder* b){
List_u8_push(&b->buffer, '\0');
str result = str_construct((char*)b->buffer.data, b->buffer.len - 1, true);
return result;
}
void StringBuilder_removeFromEnd(StringBuilder* b, u32 count){
if(count < b->buffer.len){
b->buffer.len -= count;
}
else{
b->buffer.len = 0;
}
}
void StringBuilder_append_char(StringBuilder* b, char c){
List_u8_push(&b->buffer, c);
}
void StringBuilder_append_string(StringBuilder* b, str s){
List_u8_pushMany(&b->buffer, (u8*)s.data, s.len);
}
void StringBuilder_append_cstr(StringBuilder* b, char* s){
StringBuilder_append_string(b, str_construct(s, strlen(s), true));
}
void StringBuilder_append_i64(StringBuilder* b, i64 n){
char buf[32];
sprintf(buf, IFWIN("%lli", "%li"), n);
StringBuilder_append_cstr(b, buf);
}
void StringBuilder_append_u64(StringBuilder* b, u64 n){
char buf[32];
sprintf(buf, IFWIN("%llu", "%lu"), n);
StringBuilder_append_cstr(b, buf);
}
void StringBuilder_append_f64(StringBuilder* b, f64 n){
char buf[32];
sprintf(buf, "%lf", n);
StringBuilder_append_cstr(b, buf);
}

View File

@@ -0,0 +1,25 @@
#pragma once
#include "../collections/List.h"
#include "str.h"
typedef struct StringBuilder {
List_u8 buffer;
} StringBuilder;
static inline StringBuilder StringBuilder_alloc(u32 initial_size) {
return (StringBuilder){ .buffer = List_u8_alloc(initial_size) };
}
void StringBuilder_free(StringBuilder* b);
/// @param count set to -1 to clear StringBuilder
void StringBuilder_removeFromEnd(StringBuilder* b, u32 count);
void StringBuilder_append_char(StringBuilder* b, char c);
void StringBuilder_append_cstr(StringBuilder* b, char* s);
void StringBuilder_append_string(StringBuilder* b, str s);
void StringBuilder_append_i64(StringBuilder* b, i64 a);
void StringBuilder_append_u64(StringBuilder* b, u64 a);
void StringBuilder_append_f64(StringBuilder* b, f64 a);
// adds '\0' to the buffer and returns pointer to buffer content
str StringBuilder_getStr(StringBuilder* b);

125
src/string/str.c Normal file
View File

@@ -0,0 +1,125 @@
#include "str.h"
str str_copy(str src){
if(src.data == NULL || src.len == 0)
return src;
str nstr = str_construct((char*)malloc(src.len + 1), src.len, true);
memcpy(nstr.data, src.data, src.len);
nstr.data[nstr.len] = '\0';
return nstr;
}
bool str_equals(str s0, str s1){
if(s0.len != s1.len)
return false;
for(u32 i = 0; i < s0.len; i++)
if(s0.data[i] != s1.data[i])
return false;
return true;
}
str str_reverse(str s){
if(s.data == NULL || s.len == 0)
return s;
str r = str_construct(malloc(s.len), s.len, s.isZeroTerminated);
for(u32 i = 0; i < s.len; i++ )
r.data[i] = s.data[s.len - i - 1];
return r;
}
i32 str_seek(str src, str fragment, u32 startIndex){
if(src.len == 0 || fragment.len == 0)
return -1;
for(u32 i = startIndex; i < src.len - fragment.len + 1; i++){
for(u32 j = 0;; j++){
if(j == fragment.len)
return i;
if(src.data[i + j] != fragment.data[j])
break;
}
}
return -1;
}
i32 str_seekReverse(str src, str fragment, u32 startIndex){
if(src.len == 0 || fragment.len == 0)
return -1;
if(startIndex > src.len - 1)
startIndex = src.len - 1;
for(u32 i = startIndex; i >= fragment.len - 1; i--){
for(u32 j = 0;; j++){
if(j == fragment.len)
return i - j + 1;
if(src.data[i - j] != fragment.data[fragment.len - 1 - j])
break;
}
}
return -1;
}
i32 str_seekChar(str src, char c, u32 startIndex){
for(u32 i = startIndex; i < src.len; i++){
if(src.data[i] == c)
return i;
}
return -1;
}
i32 str_seekCharReverse(str src, char c, u32 startIndex){
if(startIndex > src.len - 1)
startIndex = src.len - 1;
for(u32 i = startIndex; i != (u32)-1; i--){
if(src.data[i] == c)
return i;
}
return -1;
}
bool str_startsWith(str src, str fragment){
if(src.len < fragment.len)
return false;
src.len = fragment.len;
return str_equals(src, fragment);
}
bool str_endsWith(str src, str fragment){
if(src.len < fragment.len)
return false;
src.data = (char*)(src.data + src.len - fragment.len);
src.len = fragment.len;
return str_equals(src, fragment);
}
u32 str_hash32(str s){
u8* ubuf = (u8*)s.data;
u32 hash=0;
for (u32 i = 0; i < s.len; i++)
hash = (hash<<6) + (hash<<16) - hash + ubuf[i];
return hash;
}
str str_toUpper(str src){
str r = str_copy(src);
for (u32 i = 0; i < r.len; i++){
if(isAlphabeticalLower(r.data[i]))
r.data[i] = r.data[i] - 'a' + 'A';
}
return r;
}
str str_toLower(str src){
str r = str_copy(src);
for (u32 i = 0; i < r.len; i++){
if(isAlphabeticalUpper(r.data[i]))
r.data[i] = r.data[i] - 'A' + 'a';
}
return r;
}

41
src/string/str.h Normal file
View File

@@ -0,0 +1,41 @@
#pragma once
#include "../std.h"
typedef struct str {
char* data;
u32 len;
bool isZeroTerminated;
} str;
/// creates str from a string literal
#define STR(LITERAL) str_construct(LITERAL, ARRAY_SIZE(LITERAL) - 1, true)
#define str_construct(DATA, LEN, ZERO_TERMINATED) ((str){ .data = DATA, .len = LEN, .isZeroTerminated = ZERO_TERMINATED })
static const str str_null = str_construct(NULL, 0, 0);
/// copies src content to new string and adds \0 at the end
str str_copy(str src);
/// compares two strings, NullPtr-friendly
bool str_equals(str str0, str str1);
/// allocates new string which is reversed variant of <s>
str str_reverse(str s);
i32 str_seek(str src, str fragment, u32 startIndex);
i32 str_seekReverse(str src, str fragment, u32 startIndex);
i32 str_seekChar(str src, char c, u32 startIndex);
i32 str_seekCharReverse(str src, char c, u32 startIndex);
bool str_startsWith(str src, str fragment);
bool str_endsWith(str src, str fragment);
/// @brief calculates string hash using sdbm32 algorythm (something like lightweight crc32)
/// @return non-cryptografic hash of the string
u32 str_hash32(str s);
str str_toUpper(str src);
str str_toLower(str src);