lexer

2024-11-21 09:32:53 +05:00
parent 1cae800a66
commit 1729070b80
12 changed files with 425 additions and 160 deletions
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@@ -7,7 +7,7 @@
            "request": "launch",
            "program": "${workspaceFolder}/bin/tcpu",
            "windows": { "program": "${workspaceFolder}/bin/tcpu.exe" },
-            "args": [ "--image", "image.bin" ],
+            "args": [ "-c", "s.tasm", "o.bin" ],
            "cwd": "${workspaceFolder}/bin",
            "preLaunchTask": "build_exec_dbg",
            "stopAtEntry": false,
--- a/src/VM/VM.c
+++ b/src/VM/VM.c
@@ -6,7 +6,7 @@ void VM_init(VM* vm){
    vm->state = VMState_Initialized;
 }

-void _VM_setError(VM* vm, const char* context, const char* format, ...){
+void _VM_setError(VM* vm, cstr context, cstr format, ...){
    va_list argv;
    char position_str[32];
    sprintf(position_str, "[at 0x%x][", (u32)vm->current_pos);
@@ -23,7 +23,7 @@ void _VM_setError(VM* vm, const char* context, const char* format, ...){
    vm->state = VMState_InternalError;
 }

-bool VM_loadProgram(VM* vm, u8* data, size_t size){
+bool VM_setMemory(VM* vm, u8* data, size_t size){
    if(data == NULL){
        VM_setError(vm, "data == NULL");
        return false;
@@ -38,7 +38,7 @@ bool VM_loadProgram(VM* vm, u8* data, size_t size){
    return true;
 }

-i32 VM_executeProgram(VM* vm){
+i32 VM_boot(VM* vm){
    if(vm->data == NULL){
        VM_setError(vm, "data == null");
        return -1;
--- a/src/VM/VM.h
+++ b/src/VM/VM.h
@@ -56,15 +56,15 @@ typedef struct VM {
 void VM_init(VM* vm);

 /// @brief Loads a program from the buffer.
-/// @param data buffer with full program code
+/// @param data buffer starting with machine code
 /// @param size size of the program in bytes
-bool VM_loadProgram(VM* vm, u8* data, size_t size);
+bool VM_setMemory(VM* vm, u8* data, size_t size);

 /// @brief Executes the program loaded into VM.
 /// @return program exit code or -1 on error (check vm.error_message)
-i32 VM_executeProgram(VM* vm);
+i32 VM_boot(VM* vm);

 bool VM_dataRead(VM* vm, void* dst, size_t pos, size_t size);

 #define VM_setError(vm, format, ...) _VM_setError(vm, __func__, format ,##__VA_ARGS__)
-void _VM_setError(VM* vm, const char* context, const char* format, ...) __attribute__((__format__(__printf__, 3, 4)));
+void _VM_setError(VM* vm, cstr context, cstr format, ...) __attribute__((__format__(__printf__, 3, 4)));
--- a/src/collections/List.c
+++ b/src/collections/List.c
@@ -0,0 +1,5 @@
+#include "List.h"
+
+List_define(cstr);
+List_define(u32);
+List_define(u8);
--- a/src/collections/List.h
+++ b/src/collections/List.h
@@ -39,3 +39,7 @@
 // sizeof(T) == 2  - padding is 3 of sizeof(T)
 // sizeof(T) == 4  - padding is 1 of sizeof(T)
 #define __List_padding_in_sizeof_T(T) ((8 - sizeof(T) % 8) / sizeof(T) )
+
+List_declare(cstr);
+List_declare(u32);
+List_declare(u8);
--- a/src/compiler/compiler.c
+++ b/src/compiler/compiler.c
@@ -2,25 +2,50 @@

 List_define(Token);

-void _Compiler_setError(Compiler* cmp, const char* context, const char* format, ...){
-    va_list argv;
+CodePos Compiler_getLineAndColumn(Compiler* cmp, u32 pos){
+    u32 prev_lines_len = 0;
+    if(pos >= cmp->code_len)
+        return CodePos_create(0, 0);
+    
+    for(u32 i = 0; i < cmp->line_lengths.len; i++){
+        u32 line_len = cmp->line_lengths.data[i];
+        if(prev_lines_len + line_len > pos)
+            return CodePos_create(i + 1, pos + 1 - prev_lines_len);
+        prev_lines_len += line_len;
+    }
+
+    return CodePos_create(0, 0);
+}
+
+static void completeLine(Compiler* cmp){
+    List_u32_push(&cmp->line_lengths, cmp->column);
+    cmp->column = 0;
+}
+
+void _Compiler_setError(Compiler* cmp, cstr context, cstr format, ...){
+    completeLine(cmp);
+    // happens at the end of file
+    if(cmp->pos >= cmp->code_len)
+        cmp->pos = cmp->code_len - 1;
    char position_str[32];
-    sprintf(position_str, "[at %u:%u][", cmp->line, cmp->column);
+    CodePos code_pos = Compiler_getLineAndColumn(cmp, cmp->pos);
+    sprintf(position_str, "[at %u:%u][", code_pos.line, code_pos.column);
    char* real_format = strcat_malloc(position_str, context, "] ", format);
+    va_list argv;
    va_start(argv, format);
    char* NULLABLE(buf) = vsprintf_malloc(512, real_format, argv);
    va_end(argv);
    free(real_format);
-    if(cmp->error_message == NULL){
-        cmp->error_message = malloc(16);
-        strcpy(cmp->error_message, "SPRINTF FAILED");
+    if(buf == NULL){
+        buf = malloc(16);
+        strcpy(buf, "SPRINTF FAILED");
    }
    cmp->state = CompilerState_Error;
    cmp->error_message = buf;
 }


-#define setError(FORMAT, ...) Compiler_setError(cmp, FORMAT, ##__VA_ARGS__);
+#define setError(FORMAT, ...) Compiler_setError(cmp, FORMAT, ##__VA_ARGS__)

 #define returnError(FORMAT, ...) {\
    setError(FORMAT, ##__VA_ARGS__);\
@@ -31,139 +56,331 @@ void _Compiler_setError(Compiler* cmp, const char* context, const char* format,

 #define returnErrorIf_auto(STATEMENT) returnErrorIf(STATEMENT, #STATEMENT)

-#define Error_UnexpectedCharacter(C) "unexpected character '%c'", C
-
-#define Error_endOfFile() "unexpected end of file"
+#define Error_unexpectedCharacter(C) "unexpected character '%c'", C
+#define Error_endOfFile "unexpected end of file"

 void Compiler_init(Compiler* cmp){
    memset(cmp, 0, sizeof(Compiler));
    cmp->state = CompilerState_Initial;
-    cmp->tokens = List_Token_alloc(1024 * 8);
+    cmp->tokens = List_Token_alloc(4096);
+    cmp->line_lengths = List_u32_alloc(1024);
 }

 void Compiler_free(Compiler* cmp){
-    if(cmp->tokens.data)
-        free(cmp->tokens.data);
+    free(cmp->code);
+    free(cmp->tokens.data);
+    free(cmp->line_lengths.data);
 }

-static void _recognizeLexema(Compiler* cmp, size_t pos, char c){
-    switch(c){
-        // skip blank characters
-        case ' ': case '\t': case '\r': case '\n':
-            break;
-        // try read comment
-        case '/':
-            cmp->state = CompilerState_ReadingComment;
-            break;
-        // try read label
-        case '.':
-            cmp->state = CompilerState_ReadingLabel;
-            break;
-        default:
-            // try read instruction
-            if(isAlphabeticalL(c) || isAlphabeticalR(c)){
-                cmp->state = CompilerState_ReadingInstruction;
-                break;
-            }
-            else setError(Error_UnexpectedCharacter(c));
-    }
-}
-
-static void _readCommentChar(Compiler* cmp, size_t pos, char c, Token* tok){
-    // begin comment
-    if(tok->type == TokenType_Unset){
-        if(c == '*')
-            tok->type = TokenType_MultiLineComment;
-        else if(c == '/')
-            tok->type = TokenType_SingleLineComment;
-        else {
-            setError(Error_UnexpectedCharacter(c));
+static void readCommentSingleLine(Compiler* cmp){
+    char c; // '/'
+    Token tok = Token_construct(TokenType_SingleLineComment, cmp->pos - 1, 0);
+    cmp->column++;
+    cmp->pos++;
+    
+    while(cmp->pos < cmp->code_len){
+        c = cmp->code[cmp->pos];
+        // end of line
+        if(c == '\r' || c == '\n'){
+            tok.length = cmp->pos - tok.begin;
+            List_Token_push(&cmp->tokens, tok);
+            // cmp->line will be increased in lex()
            return;
        }
-        tok->begin = pos;
-        tok->length = 0;
+        
+        cmp->column++;
+        cmp->pos++;
    }
-    // end multi-line comment
-    else if(c == '/' && tok->type == TokenType_SingleLineComment) {
-        if(cmp->code[pos - 1] == '*' && pos - 1 > tok->begin){
-            tok->length = pos - tok->begin - 2;
-        }
-    }
-    // end single-line comment
-    else if(c == '\n' && tok->type == TokenType_SingleLineComment) {
-        tok->length = pos - tok->begin - 1;
-        // remove trailing '\r'
-        if(cmp->code[pos - 1] == '\r')
-            tok->length--;
-        List_Token_push(&cmp->tokens, *tok);
-    }
-    // at the end of file
-    else if(pos == cmp->code_len - 1){
-        // end single-line comment
-        if(tok->type == TokenType_SingleLineComment){
-            tok->length = pos - tok->begin;
-        }
-        // error on unclosed multiline comment
-        else setError(Error_endOfFile());
-    }
-    // do nothing on comment content
+    
+    // end of file
+    tok.length = cmp->pos - tok.begin;
+    List_Token_push(&cmp->tokens, tok);
 }

+static void readCommentMultiLine(Compiler* cmp){
+    char c; // '*'
+    Token tok = Token_construct(TokenType_MultiLineComment, cmp->pos - 1, 0);
+    cmp->column++;
+    cmp->pos++;
+    
+    while(cmp->pos < cmp->code_len){
+        c = cmp->code[cmp->pos];
+        // closing comment
+        if(cmp->pos > tok.begin + 3 && c == '/' && cmp->code[cmp->pos - 1] == '*') {
+            tok.length = cmp->pos - tok.begin + 1;
+            List_Token_push(&cmp->tokens, tok);
+            return;
+        }

+        if(c == '\n')
+            completeLine(cmp);
+        cmp->column++;
+        cmp->pos++;
+    }
+    
+    // end of file
+    setError(Error_endOfFile);
+}

-bool _Compiler_lex(Compiler* cmp){
-    cmp->line = 1;
+static void readComment(Compiler* cmp){
+    char c; // '/'
+    if(cmp->pos + 1 == cmp->code_len){
+        setError(Error_endOfFile);
+        return;
+    }
+
+    c = cmp->code[cmp->pos + 1];
+    if(c == '\r' || c == '\n'){
+        setError(Error_unexpectedCharacter(cmp->code[--cmp->pos]));
+        return;
+    }
+
+    cmp->pos++;
+    cmp->column++;
+    if(c == '/')
+        readCommentSingleLine(cmp);
+    else if(c == '*')
+        readCommentMultiLine(cmp);
+    else setError(Error_unexpectedCharacter(c));
+}
+
+static void readLabel(Compiler* cmp){
+    char c; // '.'
+    cmp->pos++;
+    cmp->column++;
+    Token tok = Token_construct(TokenType_Label, cmp->pos, 0);
+    
+    while(cmp->pos < cmp->code_len){
+        c = cmp->code[cmp->pos];
+        // end of line
+        if(c == ':' || c == '\r' || c == '\n'){
+            tok.length = cmp->pos - tok.begin;
+            if(tok.length > 0)
+                List_Token_push(&cmp->tokens, tok);
+            else setError(Error_unexpectedCharacter(cmp->code[--cmp->pos]));
+            // cmp->line will be increased in lex()
+            return;
+        }
+
+        if(!isAlphabeticalLower(c) && !isAlphabeticalUpper(c) && !isDigit(c)){
+            setError(Error_unexpectedCharacter(c));
+            return;
+        }
+        
+        cmp->column++;
+        cmp->pos++;
+    }
+    
+    // end of file
+    tok.length = cmp->pos - tok.begin;
+    if(tok.length > 0)
+        List_Token_push(&cmp->tokens, tok);
+    else setError(Error_endOfFile);
+}
+
+static void readArguments(Compiler* cmp){
+    char c; // space
+    cmp->pos++;
+    cmp->column++;
+    Token tok = Token_construct(TokenType_Argument, cmp->pos, 0);
+    
+    while(cmp->pos < cmp->code_len){
+        c = cmp->code[cmp->pos];
+        // end of line
+        if(c == '\r' || c == '\n'){
+            tok.length = cmp->pos - tok.begin;
+            if(tok.length > 0)
+                List_Token_push(&cmp->tokens, tok);
+            // cmp->line will be increased in lex()
+            return;
+        }
+
+        // new argument begins
+        if(c == ' ' || c == '\t'){
+            tok.length = cmp->pos - tok.begin;
+            if(tok.length > 0)
+                List_Token_push(&cmp->tokens, tok);
+            tok.begin = cmp->pos + 1;
+        }
+        
+
+        cmp->column++;
+        cmp->pos++;
+    }
+    
+    // end of file
+    tok.length = cmp->pos - tok.begin;
+    if(tok.length > 0)
+        List_Token_push(&cmp->tokens, tok);
+}
+
+static void readInstruction(Compiler* cmp){
+    Token tok = Token_construct(TokenType_Instruction, cmp->pos, 0);
+    cmp->pos++;
+    cmp->column++;
+    
+    while(cmp->pos < cmp->code_len){
+        char c = cmp->code[cmp->pos];
+        // end of line
+        if(c == '\r' || c == '\n'){
+            tok.length = cmp->pos - tok.begin;
+            List_Token_push(&cmp->tokens, tok);
+            // cmp->line will be increased in lex()
+            return;
+        }
+
+        // arguments begin
+        if(c == ' ' || c == '\t'){
+            tok.length = cmp->pos - tok.begin;
+            List_Token_push(&cmp->tokens, tok);
+            readArguments(cmp);
+            return;
+        }
+
+        if(!isAlphabeticalLower(c) && !isAlphabeticalUpper(c) && !isDigit(c)){
+            setError(Error_unexpectedCharacter(c));
+            return;
+        }
+
+        cmp->column++;
+        cmp->pos++;
+    }
+    
+    // end of file
+    tok.length = cmp->pos - tok.begin;
+    List_Token_push(&cmp->tokens, tok);
+}
+
+static bool lex(Compiler* cmp){
+    returnErrorIf_auto(cmp->state != CompilerState_Initial);
+    cmp->state = CompilerState_Lexing;
    cmp->column = 1;
-    Token tok = Token_construct(TokenType_Unset, 0, 0);
-    for(size_t pos = 0; pos < cmp->code_len; pos++){
-        char c = cmp->code[pos];

-        switch(cmp->state){
-            case CompilerState_Error:
-                return false;
-            case CompilerState_LexemaRecognition:
-                _recognizeLexema(cmp, pos, c);
+    while(cmp->pos < cmp->code_len){
+        char c = cmp->code[cmp->pos];
+        switch(c){
+            // skip blank characters
+            case ' ': case '\t': case '\r': case '\n':
                break;
-            case CompilerState_ReadingComment:
-                _readCommentChar(cmp, pos, c, &tok);
+            // try read comment
+            case '/':
+                readComment(cmp);
+                break;
+            // try read label
+            case '.':
+                readLabel(cmp);
                break;
            default:
-                returnError("Unexpected compiler state %i", cmp->state);
+                // try read instruction
+                if(isAlphabeticalLower(c) || isAlphabeticalUpper(c)){
+                    readInstruction(cmp);
+                    break;
+                }
+                else returnError(Error_unexpectedCharacter(c));
        }

-        if(c == '\n'){
-            cmp->column = 0;
-            cmp->line++;
-        }
+        if(cmp->state == CompilerState_Error)
+            return false;
+        
+        c = cmp->code[cmp->pos];
+        if(c == '\n')
+            completeLine(cmp);
        cmp->column++;
+        cmp->pos++;
    }

-    return cmp->state != CompilerState_Error;
-}
-
-bool _Compiler_parse(Compiler* cmp){
-    return true;
-}
-bool _Compiler_compile(Compiler* cmp){
+    completeLine(cmp);
    return true;
 }

-bool Compiler_compileTasm(Compiler* cmp, const char* restrict code, size_t code_len, char* restrict out_buffer, size_t out_len){
-    returnErrorIf_auto(code == NULL);
-    returnErrorIf_auto(code_len == 0);
-    returnErrorIf_auto(out_buffer == NULL);
-    returnErrorIf_auto(out_len == 0);
-    cmp->code = code;
-    cmp->code_len = code_len;
-    cmp->out_buffer = out_buffer;
-    cmp->out_len = out_len;
+static bool parse(Compiler* cmp){
+    returnErrorIf_auto(cmp->state != CompilerState_Lexing);
+    cmp->state = CompilerState_Parsing;

-    if(!_Compiler_lex(cmp))
-        return false;
-    if(!_Compiler_parse(cmp))
-        return false;
-    if(!_Compiler_compile(cmp))
-        return false;
+    return true;
+}
+static bool compile(Compiler* cmp, FILE* f){
+    returnErrorIf_auto(cmp->state != CompilerState_Parsing);
+    cmp->state = CompilerState_Compiling;
+
+    return true;
+}
+
+bool Compiler_compileTasm(Compiler* cmp, cstr source_file_name, cstr out_file_name, bool debug){
+    FILE* f = fopen(source_file_name, "rb");
+    if(f == NULL)
+        returnError("ERROR: can't open file '%s'", source_file_name);
    
-    return true;
+    List_u8 buf = List_u8_alloc(64 * 1024);
+    int ret;
+    while((ret = fgetc(f)) != EOF) {
+        List_u8_push(&buf, ret);
+    }
+    if(ferror(f)){
+        free(buf.data);
+        fclose(f);
+        returnError("can't read file '%s'", source_file_name);
+    }
+    fclose(f);
+
+    if(buf.len == 0){
+        free(buf.data);
+        fclose(f);
+        returnError("soucre file is empty");
+    }
+
+    cmp->code = (char*)buf.data;
+    cmp->code_len = buf.len;
+    List_u8_push(&buf, 0);
+
+    f = fopen(out_file_name, "wb");
+    if(f == NULL){
+        free(buf.data);
+        returnError("ERROR: can't open file '%s'", out_file_name);
+    }
+
+    if(debug){
+        printf("----------------------------------[%s]---------------------------------\n", source_file_name);
+        fputs(cmp->code, stdout);
+        fputc('\n', stdout);
+    }
+    
+    bool success = lex(cmp);
+    if(debug){
+        printf("------------------------------------[lines]-----------------------------------\n");
+        for(u32 i = 0; i < cmp->line_lengths.len; i++){
+            printf("[%u] length: %u\n", i+1, cmp->line_lengths.data[i]);
+        }
+        printf("------------------------------------[tokens]-----------------------------------\n");
+        for(u32 i = 0; i < cmp->tokens.len; i++){
+            Token t = cmp->tokens.data[i];
+            CodePos pos = Compiler_getLineAndColumn(cmp, t.begin);
+            char* tokstr = malloc(4096);
+            strncpy(tokstr, cmp->code + t.begin, t.length);
+            tokstr[t.length] = 0;
+            printf("[l:%u, c:%u](pos:%u, size:%u) %s '%s'\n", 
+                pos.line, pos.column,
+                t.begin, t.length,
+                TokenType_toString(t.type), tokstr);
+            free(tokstr);
+        }
+    }
+    if(!success){
+        fclose(f);
+        return false;
+    }
+
+    success = parse(cmp);
+    if(!success){
+        fclose(f);
+        return false;
+    }
+
+    success = compile(cmp, f);
+    fclose(f);
+    if(success){
+        cmp->state = CompilerState_Success;
+    }
+    
+    return success;
 }
--- a/src/compiler/compiler.h
+++ b/src/compiler/compiler.h
@@ -7,27 +7,22 @@ List_declare(Token);

 typedef enum CompilerState {
    CompilerState_Initial,
-    CompilerState_LexemaRecognition,
-    CompilerState_ReadingComment,
-    CompilerState_ReadingLabel,
-    CompilerState_ReadingInstruction,
-    CompilerState_ReadingArguments,
-    CompilerState_ReadingData,
+    CompilerState_Lexing,
+    CompilerState_Parsing,
+    CompilerState_Compiling,
    CompilerState_Error,
    CompilerState_Success
 } CompilerState;

 typedef struct Compiler {
-    const char* code;
-    size_t code_len;
-    char* out_buffer;
-    size_t out_len;
-
-    u32 line;       // > 0 if code parsing started
+    char* code;
+    u32 code_len;
    u32 column;     // > 0 if code parsing started
-    NULLABLE(char* error_message);
+    u32 pos;
    CompilerState state;
+    NULLABLE(char* error_message);
    List_Token tokens;
+    List_u32 line_lengths;
 } Compiler;

 void Compiler_init(Compiler* cmp);
@@ -35,7 +30,18 @@ void Compiler_free(Compiler* cmp);

 /// @brief compile assembly language code to machine code
 /// @return true if no errors, false if any error occured (check cmp->error_message)
-bool Compiler_compileTasm(Compiler* cmp, const char* restrict code, size_t code_len, char* restrict out_buffer, size_t out_len);
+bool Compiler_compileTasm(Compiler* cmp, cstr source_file_name, cstr out_file_name, bool debug);

 #define Compiler_setError(cmp, format, ...) _Compiler_setError(cmp, __func__, format ,##__VA_ARGS__)
-void _Compiler_setError(Compiler* cmp, const char* context, const char* format, ...) __attribute__((__format__(__printf__, 3, 4)));
+void _Compiler_setError(Compiler* cmp, cstr context, cstr format, ...) __attribute__((__format__(__printf__, 3, 4)));
+
+typedef struct CodePos {
+    u32 line; // 0 on error
+    u32 column; // 0 on error
+} CodePos;
+
+#define CodePos_create(L, C) ((CodePos){ .line = L, .column = C })
+
+/// @param pos index in code buffer
+CodePos Compiler_getLineAndColumn(Compiler* cmp, u32 pos);
+
--- a/src/compiler/token.c
+++ b/src/compiler/token.c
@@ -0,0 +1,15 @@
+#include "token.h"
+
+static cstr TokenType_str[] = {
+    "Unset",
+    "SingleLineComment",
+    "MultiLineComment",
+    "Label",
+    "Instruction",
+    "Argument",
+    "Data",
+};
+
+cstr TokenType_toString(TokenType t){
+    return TokenType_str[t];
+}
--- a/src/compiler/token.h
+++ b/src/compiler/token.h
@@ -12,6 +12,8 @@ typedef enum TokenType {
    /* there is a place for 2 values left (TokenType must occupy 4 bits) */
 } TokenType;

+cstr TokenType_toString(TokenType t);
+
 typedef struct Token {
    u32 begin;          // some index in Compiler->code
    u32 length : 28;    // length in characters (28 bits)
--- a/src/instructions/instructions.h
+++ b/src/instructions/instructions.h
@@ -6,7 +6,7 @@
 typedef i32 (*InstructionImplFunc_t)(VM* vm);

 typedef struct Instruction {
-    const char* name;
+    cstr name;
    InstructionImplFunc_t implementation;
 } Instruction;

--- a/src/main.c
+++ b/src/main.c
@@ -1,13 +1,11 @@
 #include "VM/VM.h"
 #include "instructions/instructions.h"
 #include "collections/List.h"
-
-List_declare(cstr);
-List_define(cstr);
+#include "compiler/compiler.h"

 #define arg_is(STR) (strcmp(argv[argi], STR) == 0)

-i32 compileSources(cstr out_file, List_cstr* sources);
+i32 compileSources(cstr source_file, cstr out_file);
 i32 bootFromImage(cstr image_file);

 i32 main(const i32 argc, cstr* argv){
@@ -18,9 +16,10 @@ i32 main(const i32 argc, cstr* argv){

    bool boot = false;
    cstr NULLABLE(image_file) = NULL;
+
    bool compile = false;
    cstr NULLABLE(out_file) = NULL;
-    List_cstr NULLABLE(sources) = List_cstr_construct(NULL, 0, 0);
+    cstr NULLABLE(source_file) = NULL;

    for(i32 argi = 1; argi < argc; argi++){
        if(arg_is("-h") || arg_is("--help")){
@@ -28,7 +27,7 @@ i32 main(const i32 argc, cstr* argv){
                "-h, --help                                 Show this message.\n"
                "-op, --opcodes                             Show list of all instructions.\n"
                "-i, --image [FILE]                         Boot VM using image file.\n"
-                "-c, --compile [OUT_FILE] [SOURCES...]      Compile assembly source files to machine code.\n"
+                "-c, --compile [SOURCE_FILE] [OUT_FILE]     Compile assembly source files to machine code.\n"
            );
            return 0;
        }
@@ -46,28 +45,32 @@ i32 main(const i32 argc, cstr* argv){
                printfe("--image flag is set already\n");
                return 1;
            }
+
+            boot = true;
            if(++argi >= argc){
                printfe("ERROR: no image file specified\n");
                return 1;
            }
            image_file = argv[argi];
-            boot = true;
        }
        else if(arg_is("-c") || arg_is("--compile")){
            if(compile){
                printfe("--compile flag is set already\n");
                return 1;
            }
+
+            compile = true;
+            if(++argi >= argc){
+                printfe("ERROR: no source file file specified\n");
+                return 1;
+            }
+            source_file = argv[argi];
+            
            if(++argi >= argc){
                printfe("ERROR: no output file file specified\n");
                return 1;
            }
            out_file = argv[argi];
-            sources = List_cstr_alloc(argc);
-            compile = true;
-        }
-        else if(compile){
-            List_cstr_push(&sources, argv[argi]);
        }
        else {
            printfe("ERROR: unknown argument '%s'\n", argv[argi]);
@@ -77,7 +80,7 @@ i32 main(const i32 argc, cstr* argv){

    i32 exit_code = 0;
    if(compile){
-        exit_code = compileSources(out_file, &sources);
+        exit_code = compileSources(source_file, out_file);
    }
    if(exit_code == 0 && boot){
        exit_code = bootFromImage(image_file);
@@ -109,15 +112,15 @@ i32 bootFromImage(cstr image_file){
    VM_init(&vm);

    i32 exit_code = 1;
-    if(VM_loadProgram(&vm, vm_memory, bytes_read)){
-        exit_code = VM_executeProgram(&vm);
+    if(VM_setMemory(&vm, vm_memory, bytes_read)){
+        exit_code = VM_boot(&vm);
    }
    if(vm.state == VMState_InternalError){
        if(vm.error_message){
            printfe("VM ERROR: %s\n", vm.error_message);
            free(vm.error_message);
        }
-        else printfe("VM ERROR: unknown error (error_message is null)\n");
+        else printfe("VM ERROR: unknown (error_message is null)\n");
    }
    
    if(exit_code != 0){
@@ -128,6 +131,19 @@ i32 bootFromImage(cstr image_file){
    return exit_code;
 }

-i32 compileSources(cstr out_file, List_cstr* sources){
-    return -1;
+i32 compileSources(cstr source_file, cstr out_file){
+    Compiler cmp;
+    Compiler_init(&cmp);
+    bool success = Compiler_compileTasm(&cmp, source_file, out_file, true);
+    Compiler_free(&cmp);
+    if(!success){
+        if(cmp.error_message){
+            printfe("COMPILER ERROR: %s\n", cmp.error_message);
+            free(cmp.error_message);
+        }
+        else printfe("COMPILER ERROR: unknown (error_message is null)\n");
+        return 111;
+    }
+
+    return 0;
 }
--- a/src/std.h
+++ b/src/std.h
@@ -46,12 +46,12 @@ typedef const char* cstr;
 #define NULLABLE(NAME) NAME

 #define strcat_malloc(STR0, ...) _strcat_malloc(count_args(__VA_ARGS__), STR0, __VA_ARGS__)
-char* _strcat_malloc(size_t n, const char* str0, ...);
-char* _vstrcat_malloc(size_t n, const char* str0, va_list argv);
+char* _strcat_malloc(size_t n, cstr str0, ...);
+char* _vstrcat_malloc(size_t n, cstr str0, va_list argv);

-char* NULLABLE(sprintf_malloc)(size_t buffer_size, const char* format, ...) __attribute__((__format__(__printf__, 2, 3)));
-char* NULLABLE(vsprintf_malloc)(size_t buffer_size, const char* format, va_list argv);
+char* NULLABLE(sprintf_malloc)(size_t buffer_size, cstr format, ...) __attribute__((__format__(__printf__, 2, 3)));
+char* NULLABLE(vsprintf_malloc)(size_t buffer_size, cstr format, va_list argv);

-static inline bool isAlphabeticalL(char c) { return c - 'a' <= 'z'; }
-static inline bool isAlphabeticalR(char c) { return c - 'A' <= 'Z'; }
-static inline bool isDigit(char c) { return c - '0' <= '9'; }
+static inline bool isAlphabeticalLower(char c) { return 'a' <= c && c <= 'z'; }
+static inline bool isAlphabeticalUpper(char c) { return 'A' <= c && c <= 'Z'; }
+static inline bool isDigit(char c) { return '0' <= c && c <= '9'; }