From 1729070b80e0a128202db0bd9b367a5269f05d2f Mon Sep 17 00:00:00 2001
From: Timerix <timerix11@gmail.com>
Date: Thu, 21 Nov 2024 09:32:53 +0500
Subject: [PATCH] lexer

---
 .vscode/launch.json             |   2 +-
 src/VM/VM.c                     |   6 +-
 src/VM/VM.h                     |   8 +-
 src/collections/List.c          |   5 +
 src/collections/List.h          |   4 +
 src/compiler/compiler.c         | 439 ++++++++++++++++++++++++--------
 src/compiler/compiler.h         |  36 +--
 src/compiler/token.c            |  15 ++
 src/compiler/token.h            |   2 +
 src/instructions/instructions.h |   2 +-
 src/main.c                      |  52 ++--
 src/std.h                       |  14 +-
 12 files changed, 425 insertions(+), 160 deletions(-)
 create mode 100644 src/collections/List.c
 create mode 100644 src/compiler/token.c

diff --git a/.vscode/launch.json b/.vscode/launch.json
index 721cf5f..968b3b1 100644
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@@ -7,7 +7,7 @@
             "request": "launch",
             "program": "${workspaceFolder}/bin/tcpu",
             "windows": { "program": "${workspaceFolder}/bin/tcpu.exe" },
-            "args": [ "--image", "image.bin" ],
+            "args": [ "-c", "s.tasm", "o.bin" ],
             "cwd": "${workspaceFolder}/bin",
             "preLaunchTask": "build_exec_dbg",
             "stopAtEntry": false,
diff --git a/src/VM/VM.c b/src/VM/VM.c
index cc9d175..b505cf2 100644
--- a/src/VM/VM.c
+++ b/src/VM/VM.c
@@ -6,7 +6,7 @@ void VM_init(VM* vm){
     vm->state = VMState_Initialized;
 }
 
-void _VM_setError(VM* vm, const char* context, const char* format, ...){
+void _VM_setError(VM* vm, cstr context, cstr format, ...){
     va_list argv;
     char position_str[32];
     sprintf(position_str, "[at 0x%x][", (u32)vm->current_pos);
@@ -23,7 +23,7 @@ void _VM_setError(VM* vm, const char* context, const char* format, ...){
     vm->state = VMState_InternalError;
 }
 
-bool VM_loadProgram(VM* vm, u8* data, size_t size){
+bool VM_setMemory(VM* vm, u8* data, size_t size){
     if(data == NULL){
         VM_setError(vm, "data == NULL");
         return false;
@@ -38,7 +38,7 @@ bool VM_loadProgram(VM* vm, u8* data, size_t size){
     return true;
 }
 
-i32 VM_executeProgram(VM* vm){
+i32 VM_boot(VM* vm){
     if(vm->data == NULL){
         VM_setError(vm, "data == null");
         return -1;
diff --git a/src/VM/VM.h b/src/VM/VM.h
index 889e1f8..19802c7 100644
--- a/src/VM/VM.h
+++ b/src/VM/VM.h
@@ -56,15 +56,15 @@ typedef struct VM {
 void VM_init(VM* vm);
 
 /// @brief Loads a program from the buffer.
-/// @param data buffer with full program code
+/// @param data buffer starting with machine code
 /// @param size size of the program in bytes
-bool VM_loadProgram(VM* vm, u8* data, size_t size);
+bool VM_setMemory(VM* vm, u8* data, size_t size);
 
 /// @brief Executes the program loaded into VM.
 /// @return program exit code or -1 on error (check vm.error_message)
-i32 VM_executeProgram(VM* vm);
+i32 VM_boot(VM* vm);
 
 bool VM_dataRead(VM* vm, void* dst, size_t pos, size_t size);
 
 #define VM_setError(vm, format, ...) _VM_setError(vm, __func__, format ,##__VA_ARGS__)
-void _VM_setError(VM* vm, const char* context, const char* format, ...) __attribute__((__format__(__printf__, 3, 4)));
+void _VM_setError(VM* vm, cstr context, cstr format, ...) __attribute__((__format__(__printf__, 3, 4)));
diff --git a/src/collections/List.c b/src/collections/List.c
new file mode 100644
index 0000000..6b610a9
--- /dev/null
+++ b/src/collections/List.c
@@ -0,0 +1,5 @@
+#include "List.h"
+
+List_define(cstr);
+List_define(u32);
+List_define(u8);
diff --git a/src/collections/List.h b/src/collections/List.h
index 24342ca..b5adcad 100644
--- a/src/collections/List.h
+++ b/src/collections/List.h
@@ -39,3 +39,7 @@
 // sizeof(T) == 2  - padding is 3 of sizeof(T)
 // sizeof(T) == 4  - padding is 1 of sizeof(T)
 #define __List_padding_in_sizeof_T(T) ((8 - sizeof(T) % 8) / sizeof(T) )
+
+List_declare(cstr);
+List_declare(u32);
+List_declare(u8);
diff --git a/src/compiler/compiler.c b/src/compiler/compiler.c
index 920ed62..49efab5 100644
--- a/src/compiler/compiler.c
+++ b/src/compiler/compiler.c
@@ -2,25 +2,50 @@
 
 List_define(Token);
 
-void _Compiler_setError(Compiler* cmp, const char* context, const char* format, ...){
-    va_list argv;
+CodePos Compiler_getLineAndColumn(Compiler* cmp, u32 pos){
+    u32 prev_lines_len = 0;
+    if(pos >= cmp->code_len)
+        return CodePos_create(0, 0);
+    
+    for(u32 i = 0; i < cmp->line_lengths.len; i++){
+        u32 line_len = cmp->line_lengths.data[i];
+        if(prev_lines_len + line_len > pos)
+            return CodePos_create(i + 1, pos + 1 - prev_lines_len);
+        prev_lines_len += line_len;
+    }
+
+    return CodePos_create(0, 0);
+}
+
+static void completeLine(Compiler* cmp){
+    List_u32_push(&cmp->line_lengths, cmp->column);
+    cmp->column = 0;
+}
+
+void _Compiler_setError(Compiler* cmp, cstr context, cstr format, ...){
+    completeLine(cmp);
+    // happens at the end of file
+    if(cmp->pos >= cmp->code_len)
+        cmp->pos = cmp->code_len - 1;
     char position_str[32];
-    sprintf(position_str, "[at %u:%u][", cmp->line, cmp->column);
+    CodePos code_pos = Compiler_getLineAndColumn(cmp, cmp->pos);
+    sprintf(position_str, "[at %u:%u][", code_pos.line, code_pos.column);
     char* real_format = strcat_malloc(position_str, context, "] ", format);
+    va_list argv;
     va_start(argv, format);
     char* NULLABLE(buf) = vsprintf_malloc(512, real_format, argv);
     va_end(argv);
     free(real_format);
-    if(cmp->error_message == NULL){
-        cmp->error_message = malloc(16);
-        strcpy(cmp->error_message, "SPRINTF FAILED");
+    if(buf == NULL){
+        buf = malloc(16);
+        strcpy(buf, "SPRINTF FAILED");
     }
     cmp->state = CompilerState_Error;
     cmp->error_message = buf;
 }
 
 
-#define setError(FORMAT, ...) Compiler_setError(cmp, FORMAT, ##__VA_ARGS__);
+#define setError(FORMAT, ...) Compiler_setError(cmp, FORMAT, ##__VA_ARGS__)
 
 #define returnError(FORMAT, ...) {\
     setError(FORMAT, ##__VA_ARGS__);\
@@ -31,139 +56,331 @@ void _Compiler_setError(Compiler* cmp, const char* context, const char* format,
 
 #define returnErrorIf_auto(STATEMENT) returnErrorIf(STATEMENT, #STATEMENT)
 
-#define Error_UnexpectedCharacter(C) "unexpected character '%c'", C
-
-#define Error_endOfFile() "unexpected end of file"
+#define Error_unexpectedCharacter(C) "unexpected character '%c'", C
+#define Error_endOfFile "unexpected end of file"
 
 void Compiler_init(Compiler* cmp){
     memset(cmp, 0, sizeof(Compiler));
     cmp->state = CompilerState_Initial;
-    cmp->tokens = List_Token_alloc(1024 * 8);
+    cmp->tokens = List_Token_alloc(4096);
+    cmp->line_lengths = List_u32_alloc(1024);
 }
 
 void Compiler_free(Compiler* cmp){
-    if(cmp->tokens.data)
-        free(cmp->tokens.data);
+    free(cmp->code);
+    free(cmp->tokens.data);
+    free(cmp->line_lengths.data);
 }
 
-static void _recognizeLexema(Compiler* cmp, size_t pos, char c){
-    switch(c){
-        // skip blank characters
-        case ' ': case '\t': case '\r': case '\n':
-            break;
-        // try read comment
-        case '/':
-            cmp->state = CompilerState_ReadingComment;
-            break;
-        // try read label
-        case '.':
-            cmp->state = CompilerState_ReadingLabel;
-            break;
-        default:
-            // try read instruction
-            if(isAlphabeticalL(c) || isAlphabeticalR(c)){
-                cmp->state = CompilerState_ReadingInstruction;
-                break;
-            }
-            else setError(Error_UnexpectedCharacter(c));
-    }
-}
-
-static void _readCommentChar(Compiler* cmp, size_t pos, char c, Token* tok){
-    // begin comment
-    if(tok->type == TokenType_Unset){
-        if(c == '*')
-            tok->type = TokenType_MultiLineComment;
-        else if(c == '/')
-            tok->type = TokenType_SingleLineComment;
-        else {
-            setError(Error_UnexpectedCharacter(c));
+static void readCommentSingleLine(Compiler* cmp){
+    char c; // '/'
+    Token tok = Token_construct(TokenType_SingleLineComment, cmp->pos - 1, 0);
+    cmp->column++;
+    cmp->pos++;
+    
+    while(cmp->pos < cmp->code_len){
+        c = cmp->code[cmp->pos];
+        // end of line
+        if(c == '\r' || c == '\n'){
+            tok.length = cmp->pos - tok.begin;
+            List_Token_push(&cmp->tokens, tok);
+            // cmp->line will be increased in lex()
             return;
         }
-        tok->begin = pos;
-        tok->length = 0;
+        
+        cmp->column++;
+        cmp->pos++;
     }
-    // end multi-line comment
-    else if(c == '/' && tok->type == TokenType_SingleLineComment) {
-        if(cmp->code[pos - 1] == '*' && pos - 1 > tok->begin){
-            tok->length = pos - tok->begin - 2;
-        }
-    }
-    // end single-line comment
-    else if(c == '\n' && tok->type == TokenType_SingleLineComment) {
-        tok->length = pos - tok->begin - 1;
-        // remove trailing '\r'
-        if(cmp->code[pos - 1] == '\r')
-            tok->length--;
-        List_Token_push(&cmp->tokens, *tok);
-    }
-    // at the end of file
-    else if(pos == cmp->code_len - 1){
-        // end single-line comment
-        if(tok->type == TokenType_SingleLineComment){
-            tok->length = pos - tok->begin;
-        }
-        // error on unclosed multiline comment
-        else setError(Error_endOfFile());
-    }
-    // do nothing on comment content
+    
+    // end of file
+    tok.length = cmp->pos - tok.begin;
+    List_Token_push(&cmp->tokens, tok);
 }
 
+static void readCommentMultiLine(Compiler* cmp){
+    char c; // '*'
+    Token tok = Token_construct(TokenType_MultiLineComment, cmp->pos - 1, 0);
+    cmp->column++;
+    cmp->pos++;
+    
+    while(cmp->pos < cmp->code_len){
+        c = cmp->code[cmp->pos];
+        // closing comment
+        if(cmp->pos > tok.begin + 3 && c == '/' && cmp->code[cmp->pos - 1] == '*') {
+            tok.length = cmp->pos - tok.begin + 1;
+            List_Token_push(&cmp->tokens, tok);
+            return;
+        }
 
+        if(c == '\n')
+            completeLine(cmp);
+        cmp->column++;
+        cmp->pos++;
+    }
+    
+    // end of file
+    setError(Error_endOfFile);
+}
 
-bool _Compiler_lex(Compiler* cmp){
-    cmp->line = 1;
+static void readComment(Compiler* cmp){
+    char c; // '/'
+    if(cmp->pos + 1 == cmp->code_len){
+        setError(Error_endOfFile);
+        return;
+    }
+
+    c = cmp->code[cmp->pos + 1];
+    if(c == '\r' || c == '\n'){
+        setError(Error_unexpectedCharacter(cmp->code[--cmp->pos]));
+        return;
+    }
+
+    cmp->pos++;
+    cmp->column++;
+    if(c == '/')
+        readCommentSingleLine(cmp);
+    else if(c == '*')
+        readCommentMultiLine(cmp);
+    else setError(Error_unexpectedCharacter(c));
+}
+
+static void readLabel(Compiler* cmp){
+    char c; // '.'
+    cmp->pos++;
+    cmp->column++;
+    Token tok = Token_construct(TokenType_Label, cmp->pos, 0);
+    
+    while(cmp->pos < cmp->code_len){
+        c = cmp->code[cmp->pos];
+        // end of line
+        if(c == ':' || c == '\r' || c == '\n'){
+            tok.length = cmp->pos - tok.begin;
+            if(tok.length > 0)
+                List_Token_push(&cmp->tokens, tok);
+            else setError(Error_unexpectedCharacter(cmp->code[--cmp->pos]));
+            // cmp->line will be increased in lex()
+            return;
+        }
+
+        if(!isAlphabeticalLower(c) && !isAlphabeticalUpper(c) && !isDigit(c)){
+            setError(Error_unexpectedCharacter(c));
+            return;
+        }
+        
+        cmp->column++;
+        cmp->pos++;
+    }
+    
+    // end of file
+    tok.length = cmp->pos - tok.begin;
+    if(tok.length > 0)
+        List_Token_push(&cmp->tokens, tok);
+    else setError(Error_endOfFile);
+}
+
+static void readArguments(Compiler* cmp){
+    char c; // space
+    cmp->pos++;
+    cmp->column++;
+    Token tok = Token_construct(TokenType_Argument, cmp->pos, 0);
+    
+    while(cmp->pos < cmp->code_len){
+        c = cmp->code[cmp->pos];
+        // end of line
+        if(c == '\r' || c == '\n'){
+            tok.length = cmp->pos - tok.begin;
+            if(tok.length > 0)
+                List_Token_push(&cmp->tokens, tok);
+            // cmp->line will be increased in lex()
+            return;
+        }
+
+        // new argument begins
+        if(c == ' ' || c == '\t'){
+            tok.length = cmp->pos - tok.begin;
+            if(tok.length > 0)
+                List_Token_push(&cmp->tokens, tok);
+            tok.begin = cmp->pos + 1;
+        }
+        
+
+        cmp->column++;
+        cmp->pos++;
+    }
+    
+    // end of file
+    tok.length = cmp->pos - tok.begin;
+    if(tok.length > 0)
+        List_Token_push(&cmp->tokens, tok);
+}
+
+static void readInstruction(Compiler* cmp){
+    Token tok = Token_construct(TokenType_Instruction, cmp->pos, 0);
+    cmp->pos++;
+    cmp->column++;
+    
+    while(cmp->pos < cmp->code_len){
+        char c = cmp->code[cmp->pos];
+        // end of line
+        if(c == '\r' || c == '\n'){
+            tok.length = cmp->pos - tok.begin;
+            List_Token_push(&cmp->tokens, tok);
+            // cmp->line will be increased in lex()
+            return;
+        }
+
+        // arguments begin
+        if(c == ' ' || c == '\t'){
+            tok.length = cmp->pos - tok.begin;
+            List_Token_push(&cmp->tokens, tok);
+            readArguments(cmp);
+            return;
+        }
+
+        if(!isAlphabeticalLower(c) && !isAlphabeticalUpper(c) && !isDigit(c)){
+            setError(Error_unexpectedCharacter(c));
+            return;
+        }
+
+        cmp->column++;
+        cmp->pos++;
+    }
+    
+    // end of file
+    tok.length = cmp->pos - tok.begin;
+    List_Token_push(&cmp->tokens, tok);
+}
+
+static bool lex(Compiler* cmp){
+    returnErrorIf_auto(cmp->state != CompilerState_Initial);
+    cmp->state = CompilerState_Lexing;
     cmp->column = 1;
-    Token tok = Token_construct(TokenType_Unset, 0, 0);
-    for(size_t pos = 0; pos < cmp->code_len; pos++){
-        char c = cmp->code[pos];
 
-        switch(cmp->state){
-            case CompilerState_Error:
-                return false;
-            case CompilerState_LexemaRecognition:
-                _recognizeLexema(cmp, pos, c);
+    while(cmp->pos < cmp->code_len){
+        char c = cmp->code[cmp->pos];
+        switch(c){
+            // skip blank characters
+            case ' ': case '\t': case '\r': case '\n':
                 break;
-            case CompilerState_ReadingComment:
-                _readCommentChar(cmp, pos, c, &tok);
+            // try read comment
+            case '/':
+                readComment(cmp);
+                break;
+            // try read label
+            case '.':
+                readLabel(cmp);
                 break;
             default:
-                returnError("Unexpected compiler state %i", cmp->state);
+                // try read instruction
+                if(isAlphabeticalLower(c) || isAlphabeticalUpper(c)){
+                    readInstruction(cmp);
+                    break;
+                }
+                else returnError(Error_unexpectedCharacter(c));
         }
 
-        if(c == '\n'){
-            cmp->column = 0;
-            cmp->line++;
-        }
+        if(cmp->state == CompilerState_Error)
+            return false;
+        
+        c = cmp->code[cmp->pos];
+        if(c == '\n')
+            completeLine(cmp);
         cmp->column++;
+        cmp->pos++;
     }
 
-    return cmp->state != CompilerState_Error;
-}
-
-bool _Compiler_parse(Compiler* cmp){
-    return true;
-}
-bool _Compiler_compile(Compiler* cmp){
+    completeLine(cmp);
     return true;
 }
 
-bool Compiler_compileTasm(Compiler* cmp, const char* restrict code, size_t code_len, char* restrict out_buffer, size_t out_len){
-    returnErrorIf_auto(code == NULL);
-    returnErrorIf_auto(code_len == 0);
-    returnErrorIf_auto(out_buffer == NULL);
-    returnErrorIf_auto(out_len == 0);
-    cmp->code = code;
-    cmp->code_len = code_len;
-    cmp->out_buffer = out_buffer;
-    cmp->out_len = out_len;
+static bool parse(Compiler* cmp){
+    returnErrorIf_auto(cmp->state != CompilerState_Lexing);
+    cmp->state = CompilerState_Parsing;
 
-    if(!_Compiler_lex(cmp))
-        return false;
-    if(!_Compiler_parse(cmp))
-        return false;
-    if(!_Compiler_compile(cmp))
-        return false;
+    return true;
+}
+static bool compile(Compiler* cmp, FILE* f){
+    returnErrorIf_auto(cmp->state != CompilerState_Parsing);
+    cmp->state = CompilerState_Compiling;
+
+    return true;
+}
+
+bool Compiler_compileTasm(Compiler* cmp, cstr source_file_name, cstr out_file_name, bool debug){
+    FILE* f = fopen(source_file_name, "rb");
+    if(f == NULL)
+        returnError("ERROR: can't open file '%s'", source_file_name);
     
-    return true;
+    List_u8 buf = List_u8_alloc(64 * 1024);
+    int ret;
+    while((ret = fgetc(f)) != EOF) {
+        List_u8_push(&buf, ret);
+    }
+    if(ferror(f)){
+        free(buf.data);
+        fclose(f);
+        returnError("can't read file '%s'", source_file_name);
+    }
+    fclose(f);
+
+    if(buf.len == 0){
+        free(buf.data);
+        fclose(f);
+        returnError("soucre file is empty");
+    }
+
+    cmp->code = (char*)buf.data;
+    cmp->code_len = buf.len;
+    List_u8_push(&buf, 0);
+
+    f = fopen(out_file_name, "wb");
+    if(f == NULL){
+        free(buf.data);
+        returnError("ERROR: can't open file '%s'", out_file_name);
+    }
+
+    if(debug){
+        printf("----------------------------------[%s]---------------------------------\n", source_file_name);
+        fputs(cmp->code, stdout);
+        fputc('\n', stdout);
+    }
+    
+    bool success = lex(cmp);
+    if(debug){
+        printf("------------------------------------[lines]-----------------------------------\n");
+        for(u32 i = 0; i < cmp->line_lengths.len; i++){
+            printf("[%u] length: %u\n", i+1, cmp->line_lengths.data[i]);
+        }
+        printf("------------------------------------[tokens]-----------------------------------\n");
+        for(u32 i = 0; i < cmp->tokens.len; i++){
+            Token t = cmp->tokens.data[i];
+            CodePos pos = Compiler_getLineAndColumn(cmp, t.begin);
+            char* tokstr = malloc(4096);
+            strncpy(tokstr, cmp->code + t.begin, t.length);
+            tokstr[t.length] = 0;
+            printf("[l:%u, c:%u](pos:%u, size:%u) %s '%s'\n", 
+                pos.line, pos.column,
+                t.begin, t.length,
+                TokenType_toString(t.type), tokstr);
+            free(tokstr);
+        }
+    }
+    if(!success){
+        fclose(f);
+        return false;
+    }
+
+    success = parse(cmp);
+    if(!success){
+        fclose(f);
+        return false;
+    }
+
+    success = compile(cmp, f);
+    fclose(f);
+    if(success){
+        cmp->state = CompilerState_Success;
+    }
+    
+    return success;
 }
diff --git a/src/compiler/compiler.h b/src/compiler/compiler.h
index f83b689..3de4227 100644
--- a/src/compiler/compiler.h
+++ b/src/compiler/compiler.h
@@ -7,27 +7,22 @@ List_declare(Token);
 
 typedef enum CompilerState {
     CompilerState_Initial,
-    CompilerState_LexemaRecognition,
-    CompilerState_ReadingComment,
-    CompilerState_ReadingLabel,
-    CompilerState_ReadingInstruction,
-    CompilerState_ReadingArguments,
-    CompilerState_ReadingData,
+    CompilerState_Lexing,
+    CompilerState_Parsing,
+    CompilerState_Compiling,
     CompilerState_Error,
     CompilerState_Success
 } CompilerState;
 
 typedef struct Compiler {
-    const char* code;
-    size_t code_len;
-    char* out_buffer;
-    size_t out_len;
-
-    u32 line;       // > 0 if code parsing started
+    char* code;
+    u32 code_len;
     u32 column;     // > 0 if code parsing started
-    NULLABLE(char* error_message);
+    u32 pos;
     CompilerState state;
+    NULLABLE(char* error_message);
     List_Token tokens;
+    List_u32 line_lengths;
 } Compiler;
 
 void Compiler_init(Compiler* cmp);
@@ -35,7 +30,18 @@ void Compiler_free(Compiler* cmp);
 
 /// @brief compile assembly language code to machine code
 /// @return true if no errors, false if any error occured (check cmp->error_message)
-bool Compiler_compileTasm(Compiler* cmp, const char* restrict code, size_t code_len, char* restrict out_buffer, size_t out_len);
+bool Compiler_compileTasm(Compiler* cmp, cstr source_file_name, cstr out_file_name, bool debug);
 
 #define Compiler_setError(cmp, format, ...) _Compiler_setError(cmp, __func__, format ,##__VA_ARGS__)
-void _Compiler_setError(Compiler* cmp, const char* context, const char* format, ...) __attribute__((__format__(__printf__, 3, 4)));
+void _Compiler_setError(Compiler* cmp, cstr context, cstr format, ...) __attribute__((__format__(__printf__, 3, 4)));
+
+typedef struct CodePos {
+    u32 line; // 0 on error
+    u32 column; // 0 on error
+} CodePos;
+
+#define CodePos_create(L, C) ((CodePos){ .line = L, .column = C })
+
+/// @param pos index in code buffer
+CodePos Compiler_getLineAndColumn(Compiler* cmp, u32 pos);
+
diff --git a/src/compiler/token.c b/src/compiler/token.c
new file mode 100644
index 0000000..bc4f836
--- /dev/null
+++ b/src/compiler/token.c
@@ -0,0 +1,15 @@
+#include "token.h"
+
+static cstr TokenType_str[] = {
+    "Unset",
+    "SingleLineComment",
+    "MultiLineComment",
+    "Label",
+    "Instruction",
+    "Argument",
+    "Data",
+};
+
+cstr TokenType_toString(TokenType t){
+    return TokenType_str[t];
+}
diff --git a/src/compiler/token.h b/src/compiler/token.h
index 36ae24b..7ee5dde 100644
--- a/src/compiler/token.h
+++ b/src/compiler/token.h
@@ -12,6 +12,8 @@ typedef enum TokenType {
     /* there is a place for 2 values left (TokenType must occupy 4 bits) */
 } TokenType;
 
+cstr TokenType_toString(TokenType t);
+
 typedef struct Token {
     u32 begin;          // some index in Compiler->code
     u32 length : 28;    // length in characters (28 bits)
diff --git a/src/instructions/instructions.h b/src/instructions/instructions.h
index fc1f693..ba670b2 100644
--- a/src/instructions/instructions.h
+++ b/src/instructions/instructions.h
@@ -6,7 +6,7 @@
 typedef i32 (*InstructionImplFunc_t)(VM* vm);
 
 typedef struct Instruction {
-    const char* name;
+    cstr name;
     InstructionImplFunc_t implementation;
 } Instruction;
 
diff --git a/src/main.c b/src/main.c
index 741f790..65382cd 100644
--- a/src/main.c
+++ b/src/main.c
@@ -1,13 +1,11 @@
 #include "VM/VM.h"
 #include "instructions/instructions.h"
 #include "collections/List.h"
-
-List_declare(cstr);
-List_define(cstr);
+#include "compiler/compiler.h"
 
 #define arg_is(STR) (strcmp(argv[argi], STR) == 0)
 
-i32 compileSources(cstr out_file, List_cstr* sources);
+i32 compileSources(cstr source_file, cstr out_file);
 i32 bootFromImage(cstr image_file);
 
 i32 main(const i32 argc, cstr* argv){
@@ -18,9 +16,10 @@ i32 main(const i32 argc, cstr* argv){
 
     bool boot = false;
     cstr NULLABLE(image_file) = NULL;
+
     bool compile = false;
     cstr NULLABLE(out_file) = NULL;
-    List_cstr NULLABLE(sources) = List_cstr_construct(NULL, 0, 0);
+    cstr NULLABLE(source_file) = NULL;
 
     for(i32 argi = 1; argi < argc; argi++){
         if(arg_is("-h") || arg_is("--help")){
@@ -28,7 +27,7 @@ i32 main(const i32 argc, cstr* argv){
                 "-h, --help                                 Show this message.\n"
                 "-op, --opcodes                             Show list of all instructions.\n"
                 "-i, --image [FILE]                         Boot VM using image file.\n"
-                "-c, --compile [OUT_FILE] [SOURCES...]      Compile assembly source files to machine code.\n"
+                "-c, --compile [SOURCE_FILE] [OUT_FILE]     Compile assembly source files to machine code.\n"
             );
             return 0;
         }
@@ -46,28 +45,32 @@ i32 main(const i32 argc, cstr* argv){
                 printfe("--image flag is set already\n");
                 return 1;
             }
+
+            boot = true;
             if(++argi >= argc){
                 printfe("ERROR: no image file specified\n");
                 return 1;
             }
             image_file = argv[argi];
-            boot = true;
         }
         else if(arg_is("-c") || arg_is("--compile")){
             if(compile){
                 printfe("--compile flag is set already\n");
                 return 1;
             }
+
+            compile = true;
+            if(++argi >= argc){
+                printfe("ERROR: no source file file specified\n");
+                return 1;
+            }
+            source_file = argv[argi];
+            
             if(++argi >= argc){
                 printfe("ERROR: no output file file specified\n");
                 return 1;
             }
             out_file = argv[argi];
-            sources = List_cstr_alloc(argc);
-            compile = true;
-        }
-        else if(compile){
-            List_cstr_push(&sources, argv[argi]);
         }
         else {
             printfe("ERROR: unknown argument '%s'\n", argv[argi]);
@@ -77,7 +80,7 @@ i32 main(const i32 argc, cstr* argv){
 
     i32 exit_code = 0;
     if(compile){
-        exit_code = compileSources(out_file, &sources);
+        exit_code = compileSources(source_file, out_file);
     }
     if(exit_code == 0 && boot){
         exit_code = bootFromImage(image_file);
@@ -109,15 +112,15 @@ i32 bootFromImage(cstr image_file){
     VM_init(&vm);
 
     i32 exit_code = 1;
-    if(VM_loadProgram(&vm, vm_memory, bytes_read)){
-        exit_code = VM_executeProgram(&vm);
+    if(VM_setMemory(&vm, vm_memory, bytes_read)){
+        exit_code = VM_boot(&vm);
     }
     if(vm.state == VMState_InternalError){
         if(vm.error_message){
             printfe("VM ERROR: %s\n", vm.error_message);
             free(vm.error_message);
         }
-        else printfe("VM ERROR: unknown error (error_message is null)\n");
+        else printfe("VM ERROR: unknown (error_message is null)\n");
     }
     
     if(exit_code != 0){
@@ -128,6 +131,19 @@ i32 bootFromImage(cstr image_file){
     return exit_code;
 }
 
-i32 compileSources(cstr out_file, List_cstr* sources){
-    return -1;
+i32 compileSources(cstr source_file, cstr out_file){
+    Compiler cmp;
+    Compiler_init(&cmp);
+    bool success = Compiler_compileTasm(&cmp, source_file, out_file, true);
+    Compiler_free(&cmp);
+    if(!success){
+        if(cmp.error_message){
+            printfe("COMPILER ERROR: %s\n", cmp.error_message);
+            free(cmp.error_message);
+        }
+        else printfe("COMPILER ERROR: unknown (error_message is null)\n");
+        return 111;
+    }
+
+    return 0;
 }
diff --git a/src/std.h b/src/std.h
index 29313a1..dbdb070 100644
--- a/src/std.h
+++ b/src/std.h
@@ -46,12 +46,12 @@ typedef const char* cstr;
 #define NULLABLE(NAME) NAME
 
 #define strcat_malloc(STR0, ...) _strcat_malloc(count_args(__VA_ARGS__), STR0, __VA_ARGS__)
-char* _strcat_malloc(size_t n, const char* str0, ...);
-char* _vstrcat_malloc(size_t n, const char* str0, va_list argv);
+char* _strcat_malloc(size_t n, cstr str0, ...);
+char* _vstrcat_malloc(size_t n, cstr str0, va_list argv);
 
-char* NULLABLE(sprintf_malloc)(size_t buffer_size, const char* format, ...) __attribute__((__format__(__printf__, 2, 3)));
-char* NULLABLE(vsprintf_malloc)(size_t buffer_size, const char* format, va_list argv);
+char* NULLABLE(sprintf_malloc)(size_t buffer_size, cstr format, ...) __attribute__((__format__(__printf__, 2, 3)));
+char* NULLABLE(vsprintf_malloc)(size_t buffer_size, cstr format, va_list argv);
 
-static inline bool isAlphabeticalL(char c) { return c - 'a' <= 'z'; }
-static inline bool isAlphabeticalR(char c) { return c - 'A' <= 'Z'; }
-static inline bool isDigit(char c) { return c - '0' <= '9'; }
+static inline bool isAlphabeticalLower(char c) { return 'a' <= c && c <= 'z'; }
+static inline bool isAlphabeticalUpper(char c) { return 'A' <= c && c <= 'Z'; }
+static inline bool isDigit(char c) { return '0' <= c && c <= '9'; }