19 changed files with 557 additions and 724 deletions
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@ -7,7 +7,7 @@
            "request": "launch",
            "program": "${workspaceFolder}/bin/tcpu",
            "windows": { "program": "${workspaceFolder}/bin/tcpu.exe" },
-            "args": [ "-c", "../examples/s.tasm", "o.bin", "--debug" ],
+            "args": [ "-c", "s.tasm", "o.bin" ],
            "cwd": "${workspaceFolder}/bin",
            "preLaunchTask": "build_exec_dbg",
            "stopAtEntry": false,
--- a/examples/s.tasm
+++ b/examples/s.tasm
@ -1,16 +0,0 @@
 /*
 "hello world" program in my assembly language
 */
 .data:
 // named array of 8-bit values
 const8 msg "Hello, World :3\0"
 .main:
 push ax 1; // sys_write
 push bx 1; // stdout
 push cx @msg; // address of msg data
 push dx #msg; // size of msg data
 sys
 push ax 0
 exit
--- a/src/VM/VM.c
+++ b/src/VM/VM.c
@ -49,7 +49,7 @@ i32 VM_boot(VM* vm){
    while (vm->current_pos < vm->data_size){
        u8 opcode = vm->data[vm->current_pos];
-        const Instruction* instr = Instruction_getByOpcode(opcode);
+        const Instruction* instr = Instruction_getFromOpcode(opcode);
        // printfe("[at 0x%x] %02X %s\n", (u32)vm->current_pos, opcode, instr->name);
        if(instr == NULL){
            VM_setError(vm, "unknown opcode %02X", opcode);
--- a/src/collections/List.h
+++ b/src/collections/List.h
@ -16,27 +16,22 @@
        return List_##T##_construct((T*)(len > 0 ? malloc(len * sizeof(T)) : NULL), 0, 0);\
    }\
    \
-    T* List_##T##_expand(List_##T* ptr);\
+    void List_##T##_push(List_##T* ptr, T value);
    void List_##T##_push(List_##T* ptr, T value);\
 #define List_define(T)\
-    T* List_##T##_expand(List_##T* ptr){\
+    void List_##T##_push(List_##T* ptr, T value){\
-        if(ptr->len == ptr->max_len){\
+        u32 max_len = ptr->max_len;\
-            u32 max_len = ptr->max_len * 1.5;\
+        if(ptr->len == max_len){\
            max_len = max_len * 1.5;\
            max_len += __List_padding_in_sizeof_T(T);\
            /* branchless version of max(max_len, __List_min_size) */\
            max_len += (max_len < __List_min_size) * (__List_min_size - max_len);\
            ptr->data = (T*)realloc(ptr->data, max_len * sizeof(T));\
            ptr->max_len = max_len;\
        }\
-        return &ptr->data[ptr->len++];\
+        ptr->data[ptr->len++] = value;\
-    }\
+    }
    \
    void List_##T##_push(List_##T* ptr, T value){\
        T* empty_cell_ptr = List_##T##_expand(ptr);\
        *empty_cell_ptr = value;\
    }\
 #define __List_min_size 16
--- a/src/compiler/AST.c
+++ b/src/compiler/AST.c
@ -1,45 +0,0 @@
 #include "AST.h"
 List_define(Argument);
 List_define(Operation);
 List_define(DataDefinition);
 static cstr _ArgumentType_str[] = {
    "Unset",
    "Register",
    "ConstValue",
    "DataName",
    "NamedDataPointer",
    "NamedDataSize",
 };
 cstr ArgumentType_toString(ArgumentType t){
    if(t >= ARRAY_SIZE(_ArgumentType_str))
        return "!!INDEX_ERROR!!";
    return _ArgumentType_str[t];
 }
 void Section_init(Section* sec, char* name){
    sec->name = name;
    sec->data = List_DataDefinition_alloc(256);
    sec->code = List_Operation_alloc(1024);
 }
 void Section_free(Section* sec){
    free(sec->name);
    free(sec->data.data);
    free(sec->code.data);
 }
 void AST_init(AST* ast){
    ast->sections = List_Section_alloc(32);
 }
 void AST_free(AST* ast){
    for(u32 i = 0; i != ast->sections.len; i++){
        Section_free(&ast->sections.data[i]);
    }
    free(ast->sections.data);
 }
--- a/src/compiler/AST.h
+++ b/src/compiler/AST.h
@ -4,57 +4,36 @@
 #include "../collections/List.h"
 typedef enum ArgumentType {
-    ArgumentType_Unset,
+    ArgumentType_NoArgument,
    ArgumentType_Register,
    ArgumentType_ConstValue,
-    ArgumentType_DataName,
+    ArgumentType_Name,
    ArgumentType_NamedDataPointer,
    ArgumentType_NamedDataSize,
-} ArgumentType;
+};
 cstr ArgumentType_toString(ArgumentType t);
 typedef struct Argument {
    ArgumentType type;
    u32 value;
 } Argument;
 List_declare(Argument);
 typedef struct Operation {
    List_Argument args;
    Opcode op;
 } Operation;
 List_declare(Operation);
 typedef struct DataDefinition {
-    u32 element_size;
+    u8 element_size;
-    u32 count;
+    u16 size;
    cstr name;
    void* data;
-} DataDefinition;
+    cstr name;
 };
 List_declare(DataDefinition);
 typedef struct Section {
    char* name;
    List_DataDefinition data;
    List_Operation code;
 } Section;
 List_declare(Section);
 void Section_init(Section* Section, char* name);
 void Section_free(Section* Section);
 typedef struct AST {
-    List_Section sections;
+    DataDefinition
 } AST;
-void AST_init(AST* ast);
+/*
-void AST_free(AST* ast);
+d8 name ''
 goto label
 .label:
 code...
 */
--- a/src/compiler/Compiler.c
+++ b/src/compiler/Compiler.c
@ -1,149 +0,0 @@
 #include "Compiler_internal.h"
 void Compiler_init(Compiler* cmp){
    memset(cmp, 0, sizeof(Compiler));
    cmp->state = CompilerState_Initial;
    cmp->tokens = List_Token_alloc(4096);
    cmp->line_lengths = List_u32_alloc(1024);
    AST_init(&cmp->ast);
 }
 void Compiler_free(Compiler* cmp){
    free(cmp->code);
    free(cmp->tokens.data);
    free(cmp->line_lengths.data);
    AST_free(&cmp->ast);
 }
 CodePos Compiler_getLineAndColumn(Compiler* cmp, u32 pos){
    u32 prev_lines_len = 0;
    if(pos >= cmp->code_len)
        return CodePos_create(0, 0);
    for(u32 i = 0; i < cmp->line_lengths.len; i++){
        u32 line_len = cmp->line_lengths.data[i];
        if(prev_lines_len + line_len > pos)
            return CodePos_create(i + 1, pos + 1 - prev_lines_len);
        prev_lines_len += line_len;
    }
    return CodePos_create(0, 0);
 }
 void _Compiler_setError(Compiler* cmp, cstr context, cstr format, ...){
    // happens at the end of file
    if(cmp->pos >= cmp->code_len)
        cmp->pos = cmp->code_len - 1;
    char position_str[32];
    CodePos code_pos = Compiler_getLineAndColumn(cmp, cmp->pos);
    sprintf(position_str, "[at %u:%u][", code_pos.line, code_pos.column);
    char* real_format = strcat_malloc(position_str, context, "] ", format);
    va_list argv;
    va_start(argv, format);
    char* NULLABLE(buf) = vsprintf_malloc(512, real_format, argv);
    va_end(argv);
    free(real_format);
    if(buf == NULL){
        buf = malloc(16);
        strcpy(buf, "SPRINTF FAILED");
    }
    cmp->state = CompilerState_Error;
    cmp->error_message = buf;
 }
 #define setError(FORMAT, ...) {\
    Compiler_setError(cmp, FORMAT, ##__VA_ARGS__);\
 }
 char* Compiler_extractTokenStr(Compiler* cmp, Token t){
    char* s = malloc(t.length + 1);
    memcpy(s, cmp->code, t.length);
    s[t.length] = 0;
    return s;
 }
 static bool compileFile(Compiler* cmp, FILE* f){
    returnErrorIf_auto(cmp->state != CompilerState_Parsing);
    cmp->state = CompilerState_Compiling;
    return true;
 }
 bool Compiler_compile(Compiler* cmp, cstr source_file_name, cstr out_file_name, bool debug_log){
    FILE* f = fopen(source_file_name, "rb");
    if(f == NULL)
        returnError("ERROR: can't open file '%s'", source_file_name);
    List_u8 buf = List_u8_alloc(64 * 1024);
    int ret;
    while((ret = fgetc(f)) != EOF) {
        List_u8_push(&buf, ret);
    }
    if(ferror(f)){
        free(buf.data);
        fclose(f);
        returnError("can't read file '%s'", source_file_name);
    }
    fclose(f);
    if(buf.len == 0){
        free(buf.data);
        fclose(f);
        returnError("soucre file is empty");
    }
    cmp->code = (char*)buf.data;
    cmp->code_len = buf.len;
    List_u8_push(&buf, 0);
    f = fopen(out_file_name, "wb");
    if(f == NULL){
        free(buf.data);
        returnError("ERROR: can't open file '%s'", out_file_name);
    }
    if(debug_log){
        printf("----------------------------------[%s]---------------------------------\n", source_file_name);
        fputs(cmp->code, stdout);
        fputc('\n', stdout);
    }
    bool success = Compiler_lex(cmp);
    if(debug_log){
        printf("------------------------------------[lines]-----------------------------------\n");
        for(u32 i = 0; i < cmp->line_lengths.len; i++){
            printf("[%u] length: %u\n", i+1, cmp->line_lengths.data[i]);
        }
        printf("------------------------------------[tokens]-----------------------------------\n");
        for(u32 i = 0; i < cmp->tokens.len; i++){
            Token t = cmp->tokens.data[i];
            CodePos pos = Compiler_getLineAndColumn(cmp, t.begin);
            char* tokstr = malloc(4096);
            strncpy(tokstr, cmp->code + t.begin, t.length);
            tokstr[t.length] = 0;
            printf("[l:%3u, c:%3u] %s '%s'\n", 
                pos.line, pos.column,
                TokenType_toString(t.type), tokstr);
            free(tokstr);
        }
    }
    if(!success){
        fclose(f);
        return false;
    }
    success = Compiler_parse(cmp);
    if(!success){
        fclose(f);
        return false;
    }
    success = compileFile(cmp, f);
    fclose(f);
    if(success){
        cmp->state = CompilerState_Success;
    }
    return success;
 }
--- a/src/compiler/Lexer.c
+++ b/src/compiler/Lexer.c
@ -1,263 +0,0 @@
 #include "Compiler_internal.h"
 #define setError(FORMAT, ...) {\
    completeLine(cmp);\
    Compiler_setError(cmp, FORMAT, ##__VA_ARGS__);\
 }
 #define Error_unexpectedCharacter(C) "unexpected character '%c'", C
 #define Error_endOfFile "unexpected end of file"
 static void completeLine(Compiler* cmp){
    List_u32_push(&cmp->line_lengths, cmp->column);
    cmp->column = 0;
 }
 static void readCommentSingleLine(Compiler* cmp){
    char c; // '/'
    Token tok = Token_construct(TokenType_SingleLineComment, cmp->pos - 1, 0);
    cmp->column++;
    cmp->pos++;
    while(cmp->pos < cmp->code_len){
        c = cmp->code[cmp->pos];
        // end of line
        if(c == '\r' || c == '\n'){
            tok.length = cmp->pos - tok.begin;
            List_Token_push(&cmp->tokens, tok);
            // cmp->line will be increased in lex()
            return;
        }
        cmp->column++;
        cmp->pos++;
    }
    // end of file
    tok.length = cmp->pos - tok.begin;
    List_Token_push(&cmp->tokens, tok);
 }
 static void readCommentMultiLine(Compiler* cmp){
    char c; // '*'
    Token tok = Token_construct(TokenType_MultiLineComment, cmp->pos - 1, 0);
    cmp->column++;
    cmp->pos++;
    while(cmp->pos < cmp->code_len){
        c = cmp->code[cmp->pos];
        // closing comment
        if(cmp->pos > tok.begin + 3 && c == '/' && cmp->code[cmp->pos - 1] == '*') {
            tok.length = cmp->pos - tok.begin + 1;
            List_Token_push(&cmp->tokens, tok);
            return;
        }
        if(c == '\n')
            completeLine(cmp);
        cmp->column++;
        cmp->pos++;
    }
    // end of file
    setError(Error_endOfFile);
 }
 static void readComment(Compiler* cmp){
    char c; // '/'
    if(cmp->pos + 1 == cmp->code_len){
        setError(Error_endOfFile);
        return;
    }
    c = cmp->code[cmp->pos + 1];
    if(c == '\r' || c == '\n'){
        setError(Error_unexpectedCharacter(cmp->code[--cmp->pos]));
        return;
    }
    cmp->pos++;
    cmp->column++;
    if(c == '/')
        readCommentSingleLine(cmp);
    else if(c == '*')
        readCommentMultiLine(cmp);
    else setError(Error_unexpectedCharacter(c));
 }
 static void readLabel(Compiler* cmp){
    char c; // '.'
    cmp->pos++;
    cmp->column++;
    Token tok = Token_construct(TokenType_Label, cmp->pos, 0);
    while(cmp->pos < cmp->code_len){
        c = cmp->code[cmp->pos];
        // end of line
        if(c == ':' || c == '\r' || c == '\n'){
            tok.length = cmp->pos - tok.begin;
            if(tok.length > 0)
                List_Token_push(&cmp->tokens, tok);
            else setError(Error_unexpectedCharacter(cmp->code[--cmp->pos]));
            // cmp->line will be increased in lex()
            return;
        }
        if(!isAlphabeticalLower(c) && !isAlphabeticalUpper(c) && !isDigit(c) &&
            c != '_' && c != '.'){
            setError(Error_unexpectedCharacter(c));
            return;
        }
        cmp->column++;
        cmp->pos++;
    }
    // end of file
    tok.length = cmp->pos - tok.begin;
    if(tok.length > 0)
        List_Token_push(&cmp->tokens, tok);
    else setError(Error_endOfFile);
 }
 static void readArguments(Compiler* cmp){
    char c; // space
    Token tok = Token_construct(TokenType_Unset, cmp->pos, 0);
    char quot = '\0'; // quotation character of a string value
    while(cmp->pos < cmp->code_len){
        c = cmp->code[cmp->pos];
        // string argument reading
        if(quot != '\0'){
            if(c == quot && cmp->code[cmp->pos - 1] != '\\'){
                quot = '\0';
            }
            else if(c == '\r' || c == '\n'){
                setError("line end reached but string hasn't been closed yet");
                return;
            }
        }
        // end of line
        else if(c == '\r' || c == '\n' || c == ';'){
            tok.length = cmp->pos - tok.begin;
            if(tok.length > 0)
                List_Token_push(&cmp->tokens, tok);
            // cmp->line will be increased in lex()
            return;
        }
        // new argument begins
        else if(c == ' ' || c == '\t'){
            tok.length = cmp->pos - tok.begin;
            if(tok.length > 0)
                List_Token_push(&cmp->tokens, tok);
            tok = Token_construct(TokenType_Unset, cmp->pos + 1, 0);
        }
        else if(tok.type == TokenType_Unset){
            if(c == '\''){
                tok.type = TokenType_Char;
                quot = c;
            }
            else if(c == '"'){
                tok.type = TokenType_String;
                quot = c;
            }
            else if(c == '@')
                tok.type = TokenType_NamedDataPointer;
            else if(c == '#')
                tok.type = TokenType_NamedDataSize;
            else if(isDigit(c))
                tok.type = TokenType_Number;
            else tok.type = TokenType_Name;
        }
        cmp->column++;
        cmp->pos++;
    }
    // end of file
    tok.length = cmp->pos - tok.begin;
    if(tok.length > 0)
        List_Token_push(&cmp->tokens, tok);
 }
 static void readInstruction(Compiler* cmp){
    Token tok = Token_construct(TokenType_Instruction, cmp->pos, 0);
    cmp->pos++;
    cmp->column++;
    while(cmp->pos < cmp->code_len){
        char c = cmp->code[cmp->pos];
        // end of line
        if(c == '\r' || c == '\n' || c == ';'){
            tok.length = cmp->pos - tok.begin;
            List_Token_push(&cmp->tokens, tok);
            // cmp->line will be increased in lex()
            return;
        }
        // arguments begin
        if(c == ' ' || c == '\t'){
            tok.length = cmp->pos - tok.begin;
            List_Token_push(&cmp->tokens, tok);
            readArguments(cmp);
            return;
        }
        if(!isAlphabeticalLower(c) && !isAlphabeticalUpper(c) && !isDigit(c)){
            setError(Error_unexpectedCharacter(c));
            return;
        }
        cmp->column++;
        cmp->pos++;
    }
    // end of file
    tok.length = cmp->pos - tok.begin;
    List_Token_push(&cmp->tokens, tok);
 }
 bool Compiler_lex(Compiler* cmp){
    returnErrorIf_auto(cmp->state != CompilerState_Initial);
    cmp->state = CompilerState_Lexing;
    cmp->column = 1;
    while(cmp->pos < cmp->code_len){
        char c = cmp->code[cmp->pos];
        switch(c){
            // skip blank characters
            case ' ': case '\t': case '\r': case '\n':
                break;
            // try read comment
            case '/':
                readComment(cmp);
                break;
            // try read label
            case '.':
                readLabel(cmp);
                break;
            default:
                // try read instruction
                if(isAlphabeticalLower(c) || isAlphabeticalUpper(c))
                    readInstruction(cmp);
                else returnError(Error_unexpectedCharacter(c));
                break;
        }
        if(cmp->state == CompilerState_Error)
            return false;
        c = cmp->code[cmp->pos];
        if(c == '\n')
            completeLine(cmp);
        cmp->column++;
        cmp->pos++;
    }
    completeLine(cmp);
    return true;
 }
--- a/src/compiler/compiler.c
+++ b/src/compiler/compiler.c
@ -0,0 +1,470 @@
 #include "Lexer.h"
 List_define(Token);
 CodePos Lexer_getLineAndColumn(Lexer* cmp, u32 pos){
    u32 prev_lines_len = 0;
    if(pos >= cmp->code_len)
        return CodePos_create(0, 0);
    for(u32 i = 0; i < cmp->line_lengths.len; i++){
        u32 line_len = cmp->line_lengths.data[i];
        if(prev_lines_len + line_len > pos)
            return CodePos_create(i + 1, pos + 1 - prev_lines_len);
        prev_lines_len += line_len;
    }
    return CodePos_create(0, 0);
 }
 static void completeLine(Lexer* cmp){
    List_u32_push(&cmp->line_lengths, cmp->column);
    cmp->column = 0;
 }
 void _Lexer_setError(Lexer* cmp, cstr context, cstr format, ...){
    completeLine(cmp);
    // happens at the end of file
    if(cmp->pos >= cmp->code_len)
        cmp->pos = cmp->code_len - 1;
    char position_str[32];
    CodePos code_pos = Lexer_getLineAndColumn(cmp, cmp->pos);
    sprintf(position_str, "[at %u:%u][", code_pos.line, code_pos.column);
    char* real_format = strcat_malloc(position_str, context, "] ", format);
    va_list argv;
    va_start(argv, format);
    char* NULLABLE(buf) = vsprintf_malloc(512, real_format, argv);
    va_end(argv);
    free(real_format);
    if(buf == NULL){
        buf = malloc(16);
        strcpy(buf, "SPRINTF FAILED");
    }
    cmp->state = LexerState_Error;
    cmp->error_message = buf;
 }
 #define setError(FORMAT, ...) Lexer_setError(cmp, FORMAT, ##__VA_ARGS__)
 #define returnError(FORMAT, ...) {\
    setError(FORMAT, ##__VA_ARGS__);\
    return false;\
 }
 #define returnErrorIf(STATEMENT, FORMAT, ...) if(STATEMENT) returnError(FORMAT, ##__VA_ARGS__)
 #define returnErrorIf_auto(STATEMENT) returnErrorIf(STATEMENT, #STATEMENT)
 #define Error_unexpectedCharacter(C) "unexpected character '%c'", C
 #define Error_endOfFile "unexpected end of file"
 void Lexer_init(Lexer* cmp){
    memset(cmp, 0, sizeof(Lexer));
    cmp->state = LexerState_Initial;
    cmp->tokens = List_Token_alloc(4096);
    cmp->line_lengths = List_u32_alloc(1024);
 }
 void Lexer_free(Lexer* cmp){
    free(cmp->code);
    free(cmp->tokens.data);
    free(cmp->line_lengths.data);
 }
 static void readCommentSingleLine(Lexer* cmp){
    char c; // '/'
    Token tok = Token_construct(TokenType_SingleLineComment, cmp->pos - 1, 0);
    cmp->column++;
    cmp->pos++;
    while(cmp->pos < cmp->code_len){
        c = cmp->code[cmp->pos];
        // end of line
        if(c == '\r' || c == '\n'){
            tok.length = cmp->pos - tok.begin;
            List_Token_push(&cmp->tokens, tok);
            // cmp->line will be increased in lex()
            return;
        }
        cmp->column++;
        cmp->pos++;
    }
    // end of file
    tok.length = cmp->pos - tok.begin;
    List_Token_push(&cmp->tokens, tok);
 }
 static void readCommentMultiLine(Lexer* cmp){
    char c; // '*'
    Token tok = Token_construct(TokenType_MultiLineComment, cmp->pos - 1, 0);
    cmp->column++;
    cmp->pos++;
    while(cmp->pos < cmp->code_len){
        c = cmp->code[cmp->pos];
        // closing comment
        if(cmp->pos > tok.begin + 3 && c == '/' && cmp->code[cmp->pos - 1] == '*') {
            tok.length = cmp->pos - tok.begin + 1;
            List_Token_push(&cmp->tokens, tok);
            return;
        }
        if(c == '\n')
            completeLine(cmp);
        cmp->column++;
        cmp->pos++;
    }
    // end of file
    setError(Error_endOfFile);
 }
 static void readComment(Lexer* cmp){
    char c; // '/'
    if(cmp->pos + 1 == cmp->code_len){
        setError(Error_endOfFile);
        return;
    }
    c = cmp->code[cmp->pos + 1];
    if(c == '\r' || c == '\n'){
        setError(Error_unexpectedCharacter(cmp->code[--cmp->pos]));
        return;
    }
    cmp->pos++;
    cmp->column++;
    if(c == '/')
        readCommentSingleLine(cmp);
    else if(c == '*')
        readCommentMultiLine(cmp);
    else setError(Error_unexpectedCharacter(c));
 }
 static void readLabel(Lexer* cmp){
    char c; // '.'
    cmp->pos++;
    cmp->column++;
    Token tok = Token_construct(TokenType_Label, cmp->pos, 0);
    while(cmp->pos < cmp->code_len){
        c = cmp->code[cmp->pos];
        // end of line
        if(c == ':' || c == '\r' || c == '\n'){
            tok.length = cmp->pos - tok.begin;
            if(tok.length > 0)
                List_Token_push(&cmp->tokens, tok);
            else setError(Error_unexpectedCharacter(cmp->code[--cmp->pos]));
            // cmp->line will be increased in lex()
            return;
        }
        if(!isAlphabeticalLower(c) && !isAlphabeticalUpper(c) && !isDigit(c) &&
            c != '_' && c != '.'){
            setError(Error_unexpectedCharacter(c));
            return;
        }
        cmp->column++;
        cmp->pos++;
    }
    // end of file
    tok.length = cmp->pos - tok.begin;
    if(tok.length > 0)
        List_Token_push(&cmp->tokens, tok);
    else setError(Error_endOfFile);
 }
 static void readArguments(Lexer* cmp){
    char c; // space
    cmp->pos++;
    cmp->column++;
    Token tok = Token_construct(TokenType_Argument, cmp->pos, 0);
    while(cmp->pos < cmp->code_len){
        c = cmp->code[cmp->pos];
        // end of line
        if(c == '\r' || c == '\n' || c == ';'){
            tok.length = cmp->pos - tok.begin;
            if(tok.length > 0)
                List_Token_push(&cmp->tokens, tok);
            // cmp->line will be increased in lex()
            return;
        }
        // new argument begins
        if(c == ' ' || c == '\t'){
            tok.length = cmp->pos - tok.begin;
            if(tok.length > 0)
                List_Token_push(&cmp->tokens, tok);
            tok.begin = cmp->pos + 1;
        }
        cmp->column++;
        cmp->pos++;
    }
    // end of file
    tok.length = cmp->pos - tok.begin;
    if(tok.length > 0)
        List_Token_push(&cmp->tokens, tok);
 }
 static void readInstruction(Lexer* cmp){
    Token tok = Token_construct(TokenType_Instruction, cmp->pos, 0);
    cmp->pos++;
    cmp->column++;
    while(cmp->pos < cmp->code_len){
        char c = cmp->code[cmp->pos];
        // end of line
        if(c == '\r' || c == '\n' || c == ';'){
            tok.length = cmp->pos - tok.begin;
            List_Token_push(&cmp->tokens, tok);
            // cmp->line will be increased in lex()
            return;
        }
        // arguments begin
        if(c == ' ' || c == '\t'){
            tok.length = cmp->pos - tok.begin;
            List_Token_push(&cmp->tokens, tok);
            readArguments(cmp);
            return;
        }
        if(!isAlphabeticalLower(c) && !isAlphabeticalUpper(c) && !isDigit(c)){
            setError(Error_unexpectedCharacter(c));
            return;
        }
        cmp->column++;
        cmp->pos++;
    }
    // end of file
    tok.length = cmp->pos - tok.begin;
    List_Token_push(&cmp->tokens, tok);
 }
 static void readChar(Lexer* cmp){
    Token tok = Token_construct(TokenType_Char, cmp->pos, 0);
    cmp->pos++;
    cmp->column++;
    while(cmp->pos < cmp->code_len){
        char c = cmp->code[cmp->pos];
        // end of line
        if(c == '\r' || c == '\n'){
            setError(Error_unexpectedCharacter(cmp->code[--cmp->pos]));
            return;
        }
        if(c == '\''){
            tok.length = cmp->pos - tok.begin + 1;
            List_Token_push(&cmp->tokens, tok);
            return;
        }
        cmp->column++;
        cmp->pos++;
    }
    // end of file
    setError(Error_endOfFile);
 }
 static void readString(Lexer* cmp){
    Token tok = Token_construct(TokenType_String, cmp->pos, 0);
    cmp->pos++;
    cmp->column++;
    while(cmp->pos < cmp->code_len){
        char c = cmp->code[cmp->pos];
        // end of line
        if(c == '\r' || c == '\n'){
            setError(Error_unexpectedCharacter(cmp->code[--cmp->pos]));
            return;
        }
        if(c == '"'){
            tok.length = cmp->pos - tok.begin + 1;
            List_Token_push(&cmp->tokens, tok);
            return;
        }
        cmp->column++;
        cmp->pos++;
    }
    // end of file
    setError(Error_endOfFile);
 }
 static void readNumber(Lexer* cmp){
    Token tok = Token_construct(TokenType_Number, cmp->pos, 0);
    cmp->pos++;
    cmp->column++;
    while(cmp->pos < cmp->code_len){
        char c = cmp->code[cmp->pos];
        if(c == '\r' || c == '\n' || c == ' ' || c == '\t' || c == ',' || c == ';'){
            tok.length = cmp->pos - tok.begin;
            List_Token_push(&cmp->tokens, tok);
            return;
        }
        cmp->column++;
        cmp->pos++;
    }
    // end of file
    tok.length = cmp->pos - tok.begin;
    List_Token_push(&cmp->tokens, tok);
 }
 static bool lex(Lexer* cmp){
    returnErrorIf_auto(cmp->state != LexerState_Initial);
    cmp->state = LexerState_Lexing;
    cmp->column = 1;
    while(cmp->pos < cmp->code_len){
        char c = cmp->code[cmp->pos];
        switch(c){
            // skip blank characters
            case ' ': case '\t': case '\r': case '\n':
                break;
            // try read comment
            case '/':
                readComment(cmp);
                break;
            // try read label
            case '.':
                readLabel(cmp);
                break;
            case '"':
                readString(cmp);
                break;
            case '\'':
                readChar(cmp);
                break;
            default:
                // try read instruction
                if(isAlphabeticalLower(c) || isAlphabeticalUpper(c))
                    readInstruction(cmp);
                else if(isDigit(c))
                    readNumber(cmp);
                else returnError(Error_unexpectedCharacter(c));
                break;
        }
        if(cmp->state == LexerState_Error)
            return false;
        c = cmp->code[cmp->pos];
        if(c == '\n')
            completeLine(cmp);
        cmp->column++;
        cmp->pos++;
    }
    completeLine(cmp);
    return true;
 }
 static bool parse(Lexer* cmp){
    returnErrorIf_auto(cmp->state != LexerState_Lexing);
    cmp->state = LexerState_Parsing;
    return true;
 }
 static bool compile(Lexer* cmp, FILE* f){
    returnErrorIf_auto(cmp->state != LexerState_Parsing);
    cmp->state = LexerState_Compiling;
    return true;
 }
 bool Lexer_compileTasm(Lexer* cmp, cstr source_file_name, cstr out_file_name, bool debug){
    FILE* f = fopen(source_file_name, "rb");
    if(f == NULL)
        returnError("ERROR: can't open file '%s'", source_file_name);
    List_u8 buf = List_u8_alloc(64 * 1024);
    int ret;
    while((ret = fgetc(f)) != EOF) {
        List_u8_push(&buf, ret);
    }
    if(ferror(f)){
        free(buf.data);
        fclose(f);
        returnError("can't read file '%s'", source_file_name);
    }
    fclose(f);
    if(buf.len == 0){
        free(buf.data);
        fclose(f);
        returnError("soucre file is empty");
    }
    cmp->code = (char*)buf.data;
    cmp->code_len = buf.len;
    List_u8_push(&buf, 0);
    f = fopen(out_file_name, "wb");
    if(f == NULL){
        free(buf.data);
        returnError("ERROR: can't open file '%s'", out_file_name);
    }
    if(debug){
        printf("----------------------------------[%s]---------------------------------\n", source_file_name);
        fputs(cmp->code, stdout);
        fputc('\n', stdout);
    }
    bool success = lex(cmp);
    if(debug){
        printf("------------------------------------[lines]-----------------------------------\n");
        for(u32 i = 0; i < cmp->line_lengths.len; i++){
            printf("[%u] length: %u\n", i+1, cmp->line_lengths.data[i]);
        }
        printf("------------------------------------[tokens]-----------------------------------\n");
        for(u32 i = 0; i < cmp->tokens.len; i++){
            Token t = cmp->tokens.data[i];
            CodePos pos = Lexer_getLineAndColumn(cmp, t.begin);
            char* tokstr = malloc(4096);
            strncpy(tokstr, cmp->code + t.begin, t.length);
            tokstr[t.length] = 0;
            printf("[l:%3u, c:%3u] %s '%s'\n", 
                pos.line, pos.column,
                TokenType_toString(t.type), tokstr);
            free(tokstr);
        }
    }
    if(!success){
        fclose(f);
        return false;
    }
    success = parse(cmp);
    if(!success){
        fclose(f);
        return false;
    }
    success = compile(cmp, f);
    fclose(f);
    if(success){
        cmp->state = LexerState_Success;
    }
    return success;
 }
--- a/src/compiler/compiler.h
+++ b/src/compiler/compiler.h
@ -1,34 +1,48 @@
 #pragma once
 #include "../std.h"
 #include "../collections/List.h"
-#include "Token.h"
+#include "token.h"
 #include "AST.h"
-typedef enum CompilerState {
+List_declare(Token);
    CompilerState_Initial,
    CompilerState_Lexing,
    CompilerState_Parsing,
    CompilerState_Compiling,
    CompilerState_Error,
    CompilerState_Success
 } CompilerState;
-typedef struct Compiler {
+typedef enum LexerState {
    LexerState_Initial,
    LexerState_Lexing,
    LexerState_Parsing,
    LexerState_Compiling,
    LexerState_Error,
    LexerState_Success
 } LexerState;
 typedef struct Lexer {
    char* code;
    u32 code_len;
    u32 column;     // > 0 if code parsing started
    u32 pos;
-    CompilerState state;
+    LexerState state;
    NULLABLE(char* error_message);
    List_Token tokens;
    List_u32 line_lengths;
-    AST ast;
+} Lexer;
    u32 tok_i;
 } Compiler;
-void Compiler_init(Compiler* cmp);
+void Lexer_init(Lexer* cmp);
-void Compiler_free(Compiler* cmp);
+void Lexer_free(Lexer* cmp);
 /// @brief compile assembly language code to machine code
 /// @return true if no errors, false if any error occured (check cmp->error_message)
-bool Compiler_compile(Compiler* cmp, cstr source_file_name, cstr out_file_name, bool debug);
+bool Lexer_compileTasm(Lexer* cmp, cstr source_file_name, cstr out_file_name, bool debug);
 #define Lexer_setError(cmp, format, ...) _Lexer_setError(cmp, __func__, format ,##__VA_ARGS__)
 void _Lexer_setError(Lexer* cmp, cstr context, cstr format, ...) __attribute__((__format__(__printf__, 3, 4)));
 typedef struct CodePos {
    u32 line; // 0 on error
    u32 column; // 0 on error
 } CodePos;
 #define CodePos_create(L, C) ((CodePos){ .line = L, .column = C })
 /// @param pos index in code buffer
 CodePos Lexer_getLineAndColumn(Lexer* cmp, u32 pos);
--- a/src/compiler/compiler_internal.h
+++ b/src/compiler/compiler_internal.h
@ -1,29 +0,0 @@
 #include "Compiler.h"
 void _Compiler_setError(Compiler* cmp, cstr context, cstr format, ...) __attribute__((__format__(__printf__, 3, 4)));
 #define Compiler_setError(cmp, format, ...) _Compiler_setError(cmp, __func__, format ,##__VA_ARGS__)
 #define returnError(FORMAT, ...) {\
    setError(FORMAT, ##__VA_ARGS__);\
    return false;\
 }
 #define returnErrorIf(STATEMENT, FORMAT, ...) if(STATEMENT) returnError(FORMAT, ##__VA_ARGS__)
 #define returnErrorIf_auto(STATEMENT) returnErrorIf(STATEMENT, #STATEMENT)
 typedef struct CodePos {
    u32 line; // 0 on error
    u32 column; // 0 on error
 } CodePos;
 #define CodePos_create(L, C) ((CodePos){ .line = L, .column = C })
 /// @param pos index in code buffer
 CodePos Compiler_getLineAndColumn(Compiler* cmp, u32 pos);
 char* Compiler_extractTokenStr(Compiler* cmp, Token t);
 bool Compiler_lex(Compiler* cmp);
 bool Compiler_parse(Compiler* cmp);
--- a/src/compiler/parser.c
+++ b/src/compiler/parser.c
@ -1,68 +1,3 @@
-#include "Compiler_internal.h"
+#include "compiler.h"
-#define setError(FORMAT, ...) {\
+List_define(DataDefinition);
    cmp->pos = cmp->tokens.data[cmp->tok_i].begin;\
    Compiler_setError(cmp, FORMAT, ##__VA_ARGS__);\
 }
 #define setError_unexpectedToken(T) {\
    char* tok_str = Compiler_extractTokenStr(cmp, T);\
    setError("unexpected character '%s'", tok_str);\
    free(tok_str);\
 }
 static void parseDataDefinition(Compiler* cmp, char* instr_name, DataDefinition* dataDefPtr){
 }
 static void parseOperation(Compiler* cmp, char* instr_name, Operation* operPtr){
 }
 bool Compiler_parse(Compiler* cmp){
    returnErrorIf_auto(cmp->state != CompilerState_Lexing);
    cmp->state = CompilerState_Parsing;
    Token tok;
    Section* sec = NULL;
    while(cmp->tok_i < cmp->tokens.len){
        tok = cmp->tokens.data[cmp->tok_i];
        switch(tok.type){
            case TokenType_Unset:
                returnError("token of undefined type");
            case TokenType_SingleLineComment:
            case TokenType_MultiLineComment:
                // skip comments
                break;
            case TokenType_Label:
                // create new section
                sec = List_Section_expand(&cmp->ast.sections);
                Section_init(sec, Compiler_extractTokenStr(cmp, tok));
                break;
            case TokenType_Instruction:
                char* instr_name = Compiler_extractTokenStr(cmp, tok);
                // data definition starts with const
                if(cstr_seek(instr_name, "const", 0, 1)){
                    DataDefinition* dataDefPtr = List_DataDefinition_expand(&sec->data);
                    parseDataDefinition(cmp, instr_name, dataDefPtr);
                }
                else {
                    Operation* operPtr = List_Operation_expand(&sec->code);
                    parseOperation(cmp, instr_name, operPtr);
                }
                break;
            default:
                setError_unexpectedToken(tok);
                return false;
        }
        if(cmp->state == CompilerState_Error)
            return false;
        cmp->tok_i++;
    }
    return true;
 }
--- a/src/compiler/token.c
+++ b/src/compiler/token.c
@ -1,19 +1,19 @@
-#include "Token.h"
+#include "token.h"
 List_define(Token);
 static cstr _TokenType_str[] = {
    "Unset",
    "SingleLineComment",
    "MultiLineComment",
    "Instruction",
    "Label",
    "DataDefinition",
    "Number",
    "Char",
    "String",
-    "Name",
+    "Instruction",
-    "NamedDataPointer",
+    "Register",
-    "NamedDataSize"
+    "DataType",
    "DataPointer",
    "DataSize"
 };
 cstr TokenType_toString(TokenType t){
--- a/src/compiler/token.h
+++ b/src/compiler/token.h
@ -1,19 +1,20 @@
 #pragma once
 #include "../std.h"
 #include "../collections/List.h"
 typedef enum TokenType {
-    TokenType_Unset,                // initial value
+    TokenType_Unset,
-    TokenType_SingleLineComment,    // //comment
+    TokenType_SingleLineComment,
-    TokenType_MultiLineComment,     // /* comment */
+    TokenType_MultiLineComment,
-    TokenType_Instruction,          // abc
+    TokenType_Label,
-    TokenType_Label,                // .abc:
+    TOkenType_DataDefinition,
-    TokenType_Number,               // 0123
+    TokenType_Number,
-    TokenType_Char,                 // 'A'
+    TokenType_Char,
-    TokenType_String,               // "aaaa"
+    TokenType_String,
-    TokenType_Name,                 // xyz
+    TokenType_Instruction,
-    TokenType_NamedDataPointer,     // @xyz
+    TokenType_Register,
-    TokenType_NamedDataSize         // #xyz
+    TokenType_DataType,
    TokenType_DataPointer,
    TokenType_DataSize
 } TokenType;
 cstr TokenType_toString(TokenType t);
@ -24,6 +25,4 @@ typedef struct Token {
    TokenType type : 8; // type of token (8 bits)
 } Token;
 List_declare(Token);
 #define Token_construct(TYPE, BEGIN, END) ((Token){ .type = TYPE, .begin = BEGIN, .length = END })
--- a/src/cstr.c
+++ b/src/cstr.c
@ -50,43 +50,3 @@ char* NULLABLE(vsprintf_malloc)(size_t buffer_size, cstr format, va_list argv){
    }
    return buf;
 }
 i32 cstr_seek(const char* src, const char* fragment, u32 startIndex, u32 seekLength){
    char sc = *src, fc = *fragment;
    if(sc == 0 || fc == 0)
        return -1;
    u32 fr_start = startIndex;
    for(u32 si = startIndex; si-startIndex < seekLength && sc != 0; si++){
        sc = src[si];
        fc = fragment[si-fr_start];
        if(fc == 0)
            return fr_start;
        if(sc != fc)
            fr_start++;
    }
    return -1;
 }
 i32 cstr_seekReverse(const char* src, const char* fragment, u32 startIndex, u32 seekLength){
    char sc = *src, fc = *fragment;
    if(sc == 0 || fc == 0)
        return -1;
    i32 len = strlen(src);
    if(startIndex == (u32)-1)
        startIndex = len-1;
    u32 fr_len = strlen(fragment);
    for(u32 si = startIndex; si < (u32)-1 && si != (len - seekLength - 1); si--){
        if(si + 1 < fr_len)
            return -1;
        sc = src[si];
        fc = fragment[0];
        u32 fr_start = si;
        for(u32 fi = 0; fc == sc ; fi++){
            if(fi == fr_len)
                return fr_start;
            fc = fragment[fi];
            sc = src[si--];
        }
    }
    return -1;
 }
--- a/src/instructions/instructions.c
+++ b/src/instructions/instructions.c
@ -29,7 +29,7 @@ const Instruction instructions[] = {
    // Instruction_construct(CALL),
 };
-const Instruction* Instruction_getByOpcode(Opcode opcode){
+const Instruction* Instruction_getFromOpcode(Opcode opcode){
    if(opcode >= ARRAY_SIZE(instructions))
        return NULL;
--- a/src/instructions/instructions.h
+++ b/src/instructions/instructions.h
@ -33,4 +33,4 @@ typedef struct Instruction {
 /// @brief get instruction info from table
 /// @param opcode any byte
 /// @return ptr to struct or NULL
-const Instruction* NULLABLE(Instruction_getByOpcode)(Opcode opcode);
+const Instruction* NULLABLE(Instruction_getFromOpcode)(Opcode opcode);
--- a/src/main.c
+++ b/src/main.c
@ -1,11 +1,11 @@
 #include "VM/VM.h"
 #include "instructions/instructions.h"
 #include "collections/List.h"
-#include "compiler/Compiler.h"
+#include "compiler/compiler.h"
 #define arg_is(STR) (strcmp(argv[argi], STR) == 0)
-i32 compileSources(cstr source_file, cstr out_file, bool debug_log);
+i32 compileSources(cstr source_file, cstr out_file);
 i32 bootFromImage(cstr image_file);
 i32 main(const i32 argc, cstr* argv){
@ -21,8 +21,6 @@ i32 main(const i32 argc, cstr* argv){
    cstr NULLABLE(out_file) = NULL;
    cstr NULLABLE(source_file) = NULL;
    bool debug_log = false;
    for(i32 argi = 1; argi < argc; argi++){
        if(arg_is("-h") || arg_is("--help")){
            printf(
@ -30,13 +28,12 @@ i32 main(const i32 argc, cstr* argv){
                "-op, --opcodes                             Show list of all instructions.\n"
                "-i, --image [FILE]                         Boot VM using image file.\n"
                "-c, --compile [SOURCE_FILE] [OUT_FILE]     Compile assembly source files to machine code.\n"
                "-d, --debug                                Enable debug log.\n"
            );
            return 0;
        }
        else if(arg_is("-op") || arg_is("--opcodes")){
            for(u8 opcode = 0; opcode < 255; opcode++){
-                const Instruction* instr = Instruction_getByOpcode(opcode);
+                const Instruction* instr = Instruction_getFromOpcode(opcode);
                if(instr != NULL){
                    printf("%02X %s\n", opcode, instr->name);
                }
@ -75,9 +72,6 @@ i32 main(const i32 argc, cstr* argv){
            }
            out_file = argv[argi];
        }
        else if(arg_is("-d") || arg_is("--debug")){
            debug_log = true;
        }
        else {
            printfe("ERROR: unknown argument '%s'\n", argv[argi]);
            return 1;
@ -86,7 +80,7 @@ i32 main(const i32 argc, cstr* argv){
    i32 exit_code = 0;
    if(compile){
-        exit_code = compileSources(source_file, out_file, debug_log);
+        exit_code = compileSources(source_file, out_file);
    }
    if(exit_code == 0 && boot){
        exit_code = bootFromImage(image_file);
@ -137,20 +131,19 @@ i32 bootFromImage(cstr image_file){
    return exit_code;
 }
-i32 compileSources(cstr source_file, cstr out_file, bool debug_log){
+i32 compileSources(cstr source_file, cstr out_file){
    Compiler cmp;
    Compiler_init(&cmp);
-    bool success = Compiler_compile(&cmp, source_file, out_file, debug_log);
+    bool success = Compiler_compileTasm(&cmp, source_file, out_file, true);
    Compiler_free(&cmp);
    if(!success){
        if(cmp.error_message){
            printfe("COMPILER ERROR: %s\n", cmp.error_message);
            free(cmp.error_message);
        }
        else printfe("COMPILER ERROR: unknown (error_message is null)\n");
        Compiler_free(&cmp);
        return 111;
    }
    Compiler_free(&cmp);
    return 0;
 }
--- a/src/std.h
+++ b/src/std.h
@ -57,13 +57,3 @@ char* NULLABLE(vsprintf_malloc)(size_t buffer_size, cstr format, va_list argv);
 static inline bool isAlphabeticalLower(char c) { return 'a' <= c && c <= 'z'; }
 static inline bool isAlphabeticalUpper(char c) { return 'A' <= c && c <= 'Z'; }
 static inline bool isDigit(char c) { return '0' <= c && c <= '9'; }
 /// @param startIndex 0 ... src length
 /// @param seekLength 0 ... -1
 /// @return pos of first <fragment> inclusion in <src> or -1 if not found
 i32 cstr_seek(const char* src, const char* fragment, u32 startIndex, u32 seekLength);
 /// @param startIndex -1 ... src length
 /// @param seekLength 0 ... -1
 /// @return pos of first <fragment> inclusion in <src> or -1 if not found
 i32 cstr_seekReverse(const char* src, const char* fragment, u32 startIndex, u32 seekLength);