source generation

2023-03-14 01:13:14 +06:00
parent 832b0ced23
commit 0cf8a4e00e
11 changed files with 539 additions and 358 deletions
--- a/2
+++ b/2
--- a/src/cb2c/cb2c.c
+++ b/src/cb2c/cb2c.c
@@ -1,25 +1,80 @@
 #include "cb2c.h"
 #include "../../kerep/src/String/StringBuilder.h"

-void namespaceToC(NamespaceContext* context, StringBuilder* b){
+#define addc(C) StringBuilder_append_char(b, C);
+#define adds(S) StringBuilder_append_cptr(b, S);

-}
+void appendToken(StringBuilder* b, Token tok, u16* _tab_count){
+    u16 tab_count=*_tab_count;
+    adds(tok.value)

-void functionToC(FunctionContext* context, StringBuilder* b){
-    
-}
-
-void classToC(ClassContext* context, StringBuilder* b){
-    
-}
-
-char* contextToC(void* context){
-    StringBuilder* b=StringBuilder_create();
-    switch(((Context*)context)->type){
-        case CT_Namespace: namespaceToC(context, b); break;
-        case CT_Function: functionToC(context, b); break;
-        case CT_Class: classToC(context, b); break;
-        default: throw(ERR_WRONGTYPE);
+    switch(tok.id){
+        case tok_lbracket_fi:
+            tab_count++;
+            goto add_new_line;
+        case tok_rbracket_fi:
+            tab_count--;
+            goto add_new_line;
+        case tok_semicolon:
+        case tok_colon:
+        case tok_comment: ;
+            add_new_line:
+            addc('\n');
+            for(u16 i=0; i<tab_count; i++)
+                addc('\t');
+            break;
+        case tok_plus: case tok_minus: case tok_asterisk: case tok_slash:
+        case tok_assign: case tok_equal: case tok_not: case tok_not_equal:
+        case tok_and: case tok_and_d: case tok_or: case tok_or_d:
+        case tok_less: case tok_more: case tok_percent:
+        case tok_for: case tok_if: case tok_while: case tok_switch:
+        case tok_lbracket: case tok_rbracket: case tok_lbracket_sq: case tok_rbracket_sq:
+        case tok_break: case tok_backslash:
+            break;
+        default:
+            addc(' ');
    }
-    return StringBuilder_build(b).ptr;
+
+    *_tab_count=tab_count;
+}
+
+Maybe appendNamespaceContext(StringBuilder* b, NamespaceContext* context){
+    adds("/* context ")
+    if(context->base.namespace!=NULL){
+        adds(context->base.namespace)
+        addc('.')
+    }
+    adds(context->base.name)
+    adds(" */\n\n")
+
+    u16 tab_count=0;
+    Autoarr_foreach(context->base.tokens, tok, appendToken(b, tok, &tab_count));
+    addc('\n');
+
+    return MaybeNull;
+}
+
+Maybe appendClassContext(StringBuilder* b, ClassContext* context){
+    safethrow(ERR_NOTIMPLEMENTED,;);
+}
+
+Maybe appendFunctionContext(StringBuilder* b, FunctionContext* context){
+    safethrow(ERR_NOTIMPLEMENTED,;);
+}
+
+Maybe translateToC(Context* context){
+    StringBuilder* b=StringBuilder_create();
+    switch(context->type){
+        case ContextType_Namespace:
+            try(appendNamespaceContext(b, (NamespaceContext*)context), _0, StringBuilder_free(b));
+            break;
+        case ContextType_Class:
+            try(appendClassContext(b, (ClassContext*)context), _2, StringBuilder_free(b));
+            break;
+        case ContextType_Function:
+            try(appendFunctionContext(b, (FunctionContext*)context), _1, StringBuilder_free(b));
+            break;
+        default: safethrow(ERR_WRONGTYPE, StringBuilder_free(b));
+    }
+    return SUCCESS(UniHeapPtr(char, StringBuilder_build(b).ptr));
 }
--- a/src/cb2c/cb2c.h
+++ b/src/cb2c/cb2c.h
@@ -2,4 +2,7 @@

 #include "../lexer/lexer.h"

-char* contextToC(void* context);
+/// @brief generates C source code from tokens
+/// @param context contains code tokens
+/// @return Maybe<char*> C source code 
+Maybe translateToC(Context* context);
--- a/src/cb2c/main.c
+++ b/src/cb2c/main.c
@@ -3,55 +3,151 @@
 #include "cb2c.h"
 #include "../lexer/lexer.h"

-Autoarr(Token)* parseFile(char* filename){
-    tryLast(file_open(filename, FileOpenMode_Read), m_srcfile)
-        File* srcfile=m_srcfile.value.VoidPtr;
-    char* src;
-    tryLast(file_readAll(srcfile, &src), m_srcLen)
-        u64 srcLen=m_srcLen.value.UInt64;
-    kprintf("srclen: %lu\n", srcLen);
-    src[srcLen]='\0';
-    tryLast(lexan(src, filename), m_tokens)
-        Autoarr(Token)* tokens=m_tokens.value.VoidPtr;
-    return tokens;
-}
+char cb2c_version[]="0.0.1";

-int main(const int argc, const char* const* argv){
-    if(!setlocale(LC_ALL, "C.UTF8"))
-        kprintf("\e[93msetlocale failed\n");
+i32 main(const int argc, const char* const* argv){
+    if(setlocale(LC_CTYPE, "C.UTF-8")!=0)
+        kprintf("\e[93msetlocale failed\e[37m\n");

-    bool compile=0, translate=0;
-    char *compiler=NULL, *tranlation_out_dir=NULL; 
+    Autoarr(Pointer)* source_files=Autoarr_create(Pointer, 64, 64);
+    bool  compile=false;
+    char* compiler=NULL;
+    char* compilation_out_dir=NULL;
+    bool  translate=false;
+    char* translation_traget=NULL; // single .c + single .h; namespace structured project
+    char* translation_out_dir=NULL;
+
+    // command arguments
+    if(argc==1){
+        kprintf("\e[91mno arguments\e[37m\n");
+        return -1;
+    }
    for(int argi=1; argi<argc; argi++){
        char* a=argv[argi];
-        kprintf("%s ", a);
+        kprintf("%s\n", a);
+
+        if(cptr_compare(a, "-v") || cptr_compare(a, "--version")){
+            kprintf("  cb2c v%s\n", cb2c_version);
+            return 0;
+        }
+
        if(cptr_compare(a, "-c") || cptr_compare(a, "--compiler")){
-            compile=1;
+            compile=true;
+            kprintf("  compile=true\n");
+
            if(argi+1>=argc || argv[argi+1][0]=='-')
                throw("after argument --compiler must be compiler name");
+            
            compiler=argv[++argi];
+            kprintf("  compiler=%s\n", compiler);
        }
-        else if(cptr_compare(a, "-t") || cptr_compare(a, "--tranlation-out-dir")){
-            translate=1;
+
+        else if(cptr_compare(a, "-t") || cptr_compare(a, "--translate")){
+            translate=true;
+            kprintf("  translate=true\n");
+
            if(argi+1>=argc || argv[argi+1][0]=='-')
                throw("after argument --translation-out-dir must be directory path");
-            tranlation_out_dir=argv[argi+1];
-            if(!dir_exists(tranlation_out_dir))
-                dir_create(tranlation_out_dir);     
+            
+            translation_traget=argv[++argi];
+            kprintf("  translation_traget=%s\n",translation_traget);
+        }
+
+        else if(cptr_compare(a, "-to") || cptr_compare(a, "--tranlation-out-dir")){
+            if(argi+1>=argc || argv[argi+1][0]=='-')
+                throw("after argument --translation-out-dir must be directory path");
+            
+            translation_out_dir=argv[++argi];
+            kprintf("  translation_out_dir=%s\n", translation_out_dir);
+
+            if(!dir_exists(translation_out_dir)){
+                dir_create(translation_out_dir);
+                kprintf("  directory was created\n");
+            }
+            else kprintf("  directory exists\n");
+        }
+
+        else if(cptr_compare(a, "-o") || cptr_compare(a, "--compilation-out-dir")){
+            if(argi+1>=argc || argv[argi+1][0]=='-')
+                throw("after argument --compilation-out-dir must be directory path");
+            
+            compilation_out_dir=argv[++argi];
+            kprintf("  compilation_out_dir=%s\n", compilation_out_dir);
+
+            if(!dir_exists(compilation_out_dir)){
+                dir_create(compilation_out_dir);
+                kprintf("  directory was created\n");
+            }
+            else kprintf("  directory exists\n");
+        }
+
+        else {
+            if(a[0]=='-'){
+                kprintf("  \e[91minvalid argument: %s\e[37m\n", a);
+                return -2;
+            }
+            if(file_exists(a)){
+                Autoarr_add(source_files, a);
+                kprintf("  found source file: %s\n", a);
+            }
+            else {
+                kprintf("  \e[91mfile not found: %s\e[37m\n", a);
+                return -3;
            }
        }
-    kprintf("\n"); return 0;
+    }
+
+    if(!compile && !translate){
+        kprintf("\e[91moperation not specified: select --translate and/or --compile\e[37m\n");
+        return -4;
+    }
+    if(Autoarr_length(source_files)==0){
+        kprintf("\e[91mno source files specified\e[37m\n");
+        return -5;
+    }
+
    // kerep type system
    kt_beginInit();
    kt_initKerepTypes();
-    kt_initCbLexerTypes();
+    kt_initLexerTypes();
    kt_endInit();
-    // keywords search tree
-    init_keywordsSearchTree();
-    kprint(kp_s|kp_fgGray, "keywordsSearchTree: ", kp_h|kp_pre, keywordsSearchTree, kp_c, '\n');
-    Autoarr(Token)* tokens=parseFile("src.cb");
+    kprint_setColor(kp_fgGray);
+
+    Lexer* lexer=Lexer_create();
+    
+    Autoarr_foreach(source_files, src_file_name, ({
+        tryLast(Lexer_parseFile(lexer, src_file_name), m_tokens)
+            Autoarr(Token)* tokens=m_tokens.value.VoidPtr;
        kprintf("tokens: %u\n", Autoarr_length(tokens));
        Autoarr_foreach(tokens, tok, kprintf("%u %s\n",tok.id, tok.value));
-    STNode_free(keywordsSearchTree);
+        NamespaceContext file_context={
+            .base={
+                .name=path_basename(src_file_name, false),
+                .namespace=NULL,
+                .parent=NULL,
+                .type=ContextType_Namespace,
+                .tokens=tokens
+            }
+        };
+        tryLast(translateToC(&file_context.base), m_c_code)
+            char* generated_code=m_c_code.value.VoidPtr;
+        kprintf("%s", generated_code);
+
+        char* generated_file_name;
+        if(file_context.base.namespace!=NULL)
+            generated_file_name=cptr_concat(file_context.base.namespace,".",file_context.base.name,".g.c");
+        else generated_file_name=cptr_concat(file_context.base.name,".g.c");
+        tryLast(file_open(generated_file_name, FileOpenMode_Write), m_generated_file)
+            File* generated_file=m_generated_file.value.VoidPtr;
+        kprintf("created file %s\n", generated_file_name);
+        tryLast(file_writeCptr(generated_file, generated_code),_m_1885);
+        tryLast(file_close(generated_file),_m_14415);
+        kprintf("source code has been written to the file\n");
+        free(generated_code);
+        free(generated_file_name);
+    }));
+
+    Autoarr_free(source_files, true);
+    Lexer_destroy(lexer);
    return 0;
 }
--- a/src/lexer/analize.c
+++ b/src/lexer/analize.c
@@ -0,0 +1,184 @@
+#include "lexer.h"
+
+Maybe _throw_wrongchar(Lexer* lex){
+    char* _errline=string_extract(lex->line);
+    char* _context=string_extract(lex->context);
+    printf("\n\e[91mwrong char <%c> at [%s:%u:%u %s]\n >>> %s\n",
+        lex->source[lex->charnum], lex->filename,lex->linenum,lex->charnum,_context, _errline);
+    exit(96);
+}
+#define throw_wrongchar(freeMem) { freeMem; return _throw_wrongchar(lex); }
+
+// adds <lex->label> to <lex->tokens> as tok_label or tok_number
+void _tryAddLabel(Lexer* lex){
+    if(lex->label.length==0) return;
+
+    Unitype uni=ST_pullString(lex->keywordSearchTree, lex->label);
+    if(uni.VoidPtr!=NULL) // built-in keyword
+        Autoarr_add(lex->tokens, *(Token*)uni.VoidPtr);
+    else {          // user-defined lex->label
+        Token ut;
+        ut.value=string_extract(lex->label);
+        ut.on_heap=true;
+        switch(*lex->label.ptr){
+            case '0': case '1': case '2': case '3': case '4':
+            case '5': case '6': case '7': case '8': case '9':
+                ut.id=tok_number;
+                break;
+            default:
+                ut.id=tok_label;
+                break;
+        }
+        Autoarr_add(lex->tokens, ut);
+    }
+  
+    lex->label=(string){lex->source, 0};
+};
+#define tryAddLabel() _tryAddLabel(lex)
+  
+#define addDefTok(id) { tryAddLabel(); Autoarr_add(lex->tokens, default_tokens[id]); }
+  
+void _addDefTok_ifnext(Lexer* lex, char next, TokenId yes, TokenId no){
+    if(*(lex->source+1)==next){
+        lex->source++;
+        addDefTok(yes);
+    }
+    else addDefTok(no);
+}
+#define addDefTok_ifnext(nextChar, yesTok, noTok) _addDefTok_ifnext(lex, nextChar, yesTok, noTok)
+
+// returns text in quotes or error
+Maybe _readString(Lexer* lex, char quotChar){
+    char c;
+    bool prevIsBackslash=false;
+    char* srcFirst=lex->source;
+    while ((c=*++lex->source)){
+        if(c==quotChar) {
+            if(prevIsBackslash) // \"
+                prevIsBackslash=false;
+            else { // "
+                string str={srcFirst, lex->source-srcFirst+1};
+                char* extracted=string_extract(str);
+                return SUCCESS(UniHeapPtr(char, extracted));
+            }
+        }
+        else prevIsBackslash= c=='\\' && !prevIsBackslash;
+    }
+    lex->source=srcFirst;
+    throw_wrongchar(;);
+}
+#define readString(quotChar) _readString(lex, quotChar)
+
+
+Maybe __Lexer_analize(Lexer* lex, char* _filename, char* _source){
+    lex->filename=_filename;
+    lex->source=_source;
+    lex->tokens=Autoarr_create(Token, 64, 1024);
+    lex->label=(string){_source, 0};
+    lex->line=(string){_source, 0};
+    lex->linenum=0;
+    lex->charnum=0;
+
+    char c;
+    lex->source--;
+    while ((c=*++(lex->source))) {
+        // tryAddLabel() resets lex->label.length, but lex->label.ptr still pointing to prev token beginning
+        if(lex->label.length==0)
+            lex->label.ptr=lex->source;
+
+        switch(c){
+        case ' ': case '\t':
+        case '\r': case '\n':
+            tryAddLabel();
+            break;
+		
+        case '(': addDefTok(tok_lbracket);    break;
+        case '{': addDefTok(tok_lbracket_fi); break;
+        case '[': addDefTok(tok_lbracket_sq); break;
+        case ')': addDefTok(tok_rbracket);    break;
+        case '}': addDefTok(tok_rbracket_fi); break;
+        case ']': addDefTok(tok_rbracket_sq); break;
+		
+        case '\'':
+            tryAddLabel();
+            try(readString('\''), maybeC, ;){
+                Token ctok={
+                    .id=tok_character,
+                    .value=(char*)maybeC.value.VoidPtr
+                };
+                Autoarr_add(lex->tokens, ctok);
+            }
+            break;
+        case '"':
+            tryAddLabel();
+            try(readString('"'), maybeS, ;){
+                Token stok={
+                    .id=tok_string,
+                    .value=(char*)maybeS.value.VoidPtr
+                };
+                Autoarr_add(lex->tokens, stok);
+            }
+            break;
+		
+        case '<': addDefTok(tok_less); break;
+        case '>': addDefTok(tok_more); break;
+        case '+': addDefTok(tok_plus); break;
+        case '-': addDefTok(tok_minus); break;
+        case '*': addDefTok(tok_asterisk); break;
+		
+        case '/':
+            tryAddLabel();
+            string commentStr={lex->source, 1};
+            c=*++lex->source;
+            if(c=='/') { // single-lex->line comment
+                commentStr.length++;
+                while((c=*++lex->source)){
+                    if(c=='\n' || c=='\r') break;
+                    else commentStr.length++;
+                }
+            }
+            else if(c=='*') { // multi-lex->line comment
+                commentStr.length++; // first slash
+                while((c=*++lex->source)){
+                    commentStr.length++;
+                    if(c=='*' && *(++lex->source)=='/') break;
+                }
+                commentStr.length++; // last slash
+            }
+            else { // not comment
+                lex->source--;
+                addDefTok(tok_slash);
+                break;
+            }
+            Token comTok={
+                .value=string_extract(commentStr),
+                .id=tok_comment
+            };
+            Autoarr_add(lex->tokens, comTok);
+            break;
+		
+        case '=': addDefTok_ifnext('=', tok_equal, tok_assign);       break;
+        case '!': addDefTok_ifnext('=', tok_not_equal, tok_not);      break;
+        case '&': addDefTok_ifnext('&', tok_and_d, tok_and);          break;
+        case '|': addDefTok_ifnext('|', tok_or_d,  tok_or);           break;
+        case '?': addDefTok_ifnext('?', tok_question_d, tok_question);break;
+        case ':': addDefTok(tok_colon);      break;
+        case ';': addDefTok(tok_semicolon);  break;
+        case '.': addDefTok(tok_point);      break;
+        case ',': addDefTok(tok_comma);      break;
+        case '~': addDefTok(tok_tilda);      break;
+        case '\\':addDefTok(tok_backslash);  break;
+        case '%': addDefTok(tok_percent);    break;
+        case '^': addDefTok(tok_xor);        break;
+        case '$': addDefTok(tok_dollar);     break;
+        case '@': addDefTok(tok_at);         break;
+		
+        default:
+            lex->label.length++;
+            break;
+        }
+    }
+    
+    tryAddLabel();
+    return SUCCESS(UniHeapPtr(Autoarr(Token), lex->tokens));
+}
--- a/src/lexer/context.h
+++ b/src/lexer/context.h
@@ -5,15 +5,15 @@


 PACKED_ENUM(ContextType,
-    CT_Namespace,
-    CT_Class,
-    CT_Function
+    ContextType_Namespace,
+    ContextType_Class,
+    ContextType_Function
 )

-typedef struct Context Context;
 STRUCT(Context,
    char* name;
-    Context* parent;
+    char* namespace; /* nullable */
+    Context* parent; /* nullable */
    Autoarr(Token)* tokens;
    ContextType type;
 )
--- a/src/lexer/init.c
+++ b/src/lexer/init.c
@@ -1,6 +1,6 @@
 #include "lexer.h"

-void kt_initCbLexerTypes(){
+void kt_initLexerTypes(){
    kt_register(Token);
    kt_register(Autoarr_Token);
 }
--- a/src/lexer/lexer.c
+++ b/src/lexer/lexer.c
@@ -1,207 +1,43 @@
 #include "lexer.h"
 #include "../../kerep/src/String/string.h"
+#include "../../kerep/src/Filesystem/filesystem.h"

-STRUCT(SharedLexerData,
-    char* _source;
-    char* _filename;
-    Autoarr(Token)* _tokens;
-    string _context;
-    string _line;
-    string _label;
-    u32 _linenum;
-    u32 _charnum;
-)
+Maybe __Lexer_analize(Lexer* lex, char* _filename, char* _source);

-#define source sld->_source
-#define filename sld->_filename
-#define tokens sld->_tokens
-#define context sld->_context
-#define line sld->_line
-#define label sld->_label
-#define linenum sld->_linenum
-#define charnum sld->_charnum
-
-Maybe _throw_wrongchar(SharedLexerData* sld){
-    char* errline=string_extract(line);
-    char* _context=string_extract(context);
-    printf("\n\e[91mwrong char <%c> at [%s:%u:%u %s]\n >>> %s\n",
-                source[charnum], filename,linenum,charnum,_context, errline);
-    exit(96);
+Lexer* Lexer_create(){
+    Lexer* lex=malloc(sizeof(*lex));
+    *lex=(Lexer){0};
+    lex->analize=__Lexer_analize;
+    lex->keywordSearchTree=STNode_create();
+    for(TokenId keywordId=0; keywordId<=tok_typeof; keywordId++){
+        const Token* keywordptr=&default_tokens[keywordId];
+        Unitype uni=UniStackPtr(Token, keywordptr);
+        ST_push(lex->keywordSearchTree, keywordptr->value, uni);
+    }
+    return lex;
 }
-#define throw_wrongchar(freeMem) { freeMem; return _throw_wrongchar(sld); }

-// adds <label> to <tokens> as tok_label or tok_number
-void _tryAddLabel(SharedLexerData* sld){
-    if(label.length==0) return;
-
-    Unitype uni=ST_pullString(keywordsSearchTree, label);
-    if(uni.VoidPtr!=NULL) // built-in keyword
-        Autoarr_add(tokens, *(Token*)uni.VoidPtr);
-    else {          // user-defined label
-        Token ut;
-        ut.value=string_extract(label);
-        switch(*label.ptr){
-            case '0': case '1': case '2': case '3': case '4':
-            case '5': case '6': case '7': case '8': case '9':
-                ut.id=tok_number;
-                break;
-            default:
-                ut.id=tok_label;
-                break;
-        }
-        Autoarr_add(tokens, ut);
-    }
-   
-    label=(string){source, 0};
-};
-#define tryAddLabel() _tryAddLabel(sld)
-   
-#define addDefTok(id) tryAddLabel(); Autoarr_add(tokens, default_tokens[id])
-   
-void _addDefTok_ifnext(char next, TokenId yes, TokenId no, SharedLexerData* sld){
-    if(*(source+1)==next){
-        source++;
-        addDefTok(yes);
-    }
-    else addDefTok(no);
+void Lexer_freeMembers(void* _l){
+    Lexer* lex=(Lexer*)_l;
+    STNode_free(lex->keywordSearchTree);
 }
-#define addDefTok_ifnext(nextChar, yesTok, noTok) _addDefTok_ifnext(nextChar, yesTok, noTok, sld)

-// returns text in quotes or error
-Maybe _readString(char quotChar, SharedLexerData* sld){
-    char c;
-    bool prevIsBackslash=false;
-    char* srcFirst=source;
-    while ((c=*++source)){
-        if(c==quotChar) {
-            if(prevIsBackslash) // \"
-                prevIsBackslash=false;
-            else { // "
-                string str={srcFirst, source-srcFirst+1};
-                char* extracted=string_extract(str);
-                return SUCCESS(UniHeapPtr(char, extracted));
-            }
-        }
-        else prevIsBackslash= c=='\\' && !prevIsBackslash;
-    }
-    source=srcFirst;
-    throw_wrongchar(;);
+kt_define(Lexer, Lexer_freeMembers, NULL)
+
+void Lexer_destroy(Lexer* lex){
+    Lexer_freeMembers(lex);
+    free(lex);
 }
-#define readString(quotChar) _readString(quotChar, sld)

-
-Maybe _lexan(SharedLexerData* sld){
-    char c;
-    source--;
-    while ((c=*++source)) {
-        // tryAddLabel() resets label.length, but label.ptr still pointing to prev token beginning
-        if(label.length==0)
-            label.ptr=source;
-
-        switch(c){
-        case ' ': case '\t':
-        case '\r': case '\n':
-            tryAddLabel();
-            break;
-		
-        case '(': addDefTok(tok_lbracket);    break;
-        case '{': addDefTok(tok_lbracket_fi); break;
-        case '[': addDefTok(tok_lbracket_sq); break;
-        case ')': addDefTok(tok_rbracket);    break;
-        case '}': addDefTok(tok_rbracket_fi); break;
-        case ']': addDefTok(tok_rbracket_sq); break;
-		
-        case '\'':
-            tryAddLabel();
-            try(readString('\''), maybeC, ;){
-                Token ctok={
-                    .id=tok_character,
-                    .value=(char*)maybeC.value.VoidPtr
-                };
-                Autoarr_add(tokens, ctok);
-            }
-            break;
-        case '"':
-            tryAddLabel();
-            try(readString('"'), maybeS, ;){
-                Token stok={
-                    .id=tok_string,
-                    .value=(char*)maybeS.value.VoidPtr
-                };
-                Autoarr_add(tokens, stok);
-            }
-            break;
-		
-        case '<': addDefTok(tok_less); break;
-        case '>': addDefTok(tok_more); break;
-        case '+': addDefTok(tok_plus); break;
-        case '-': addDefTok(tok_minus); break;
-        case '*': addDefTok(tok_asterisk); break;
-		
-        case '/':
-            tryAddLabel();
-            string commentStr={source, 1};
-            c=*++source;
-            if(c=='/') { // single-line comment
-                commentStr.length++;
-                while((c=*++source)){
-                    if(c=='\n' || c=='\r') break;
-                    else commentStr.length++;
-                }
-            }
-            else if(c=='*') { // multi-line comment
-                commentStr.length++;
-                while((c=*++source)){
-                    commentStr.length++;
-                    if(c=='*' && *(++source)=='/') break;
-                }
-            }
-            else { // not comment
-                source--;
-                addDefTok(tok_slash);
-                break;
-            }
-            Token comTok={
-                .value=string_extract(commentStr),
-                .id=tok_comment
-            };
-            Autoarr_add(tokens, comTok);
-            break;
-		
-        case '=': addDefTok_ifnext('=', tok_equals, tok_assign);      break;
-        case '!': addDefTok_ifnext('=', tok_not_equals, tok_not);     break;
-        case '&': addDefTok_ifnext('&', tok_and_d, tok_and);          break;
-        case '|': addDefTok_ifnext('|', tok_or_d, tok_or);            break;
-        case '?': addDefTok_ifnext('?', tok_question_d, tok_question);break;
-        case ':': addDefTok(tok_colon);      break;
-        case ';': addDefTok(tok_semicolon);  break;
-        case '.': addDefTok(tok_point);      break;
-        case ',': addDefTok(tok_comma);     break;
-        case '~': addDefTok(tok_tilda);      break;
-        case '\\': addDefTok(tok_backslash); break;
-        case '%': addDefTok(tok_percent);    break;
-        case '^': addDefTok(tok_xor);        break;
-        case '$': addDefTok(tok_dollar);     break;
-        case '@': addDefTok(tok_at);         break;
-		
-        default:
-            label.length++;
-            break;
-        }
-    }
+///@return Maybe<Autoarr(Token)*>
+Maybe Lexer_parseFile(Lexer* lex, char* src_file_name){
+    try(file_open(src_file_name, FileOpenMode_Read), m_src_file,;)
+        File* src_file=m_src_file.value.VoidPtr;
+    char* src_text;
+    try(file_readAll(src_file, &src_text), m_src_len, file_close(src_file))
+        u64 src_len=m_src_len.value.UInt64;
+    kprintf("srclen: %lu\n", src_len);
+    try(Lexer_parseText(lex, src_file_name, src_text), m_tokens, file_close(src_file))
+        Autoarr(Token)* tokens=m_tokens.value.VoidPtr;
    return SUCCESS(UniHeapPtr(Autoarr(Token), tokens));
 }
-
-
-Maybe lexan(char* _source, char* _filename){
-    SharedLexerData sld={
-        ._source=_source,
-        ._filename=_filename,
-        ._tokens=Autoarr_create(Token, 64, 1024),
-        ._label={_source, 0},
-        ._line={_source, 0},
-        ._linenum=0,
-        ._charnum=0
-    };
-    return _lexan(&sld);
-}
--- a/src/lexer/lexer.h
+++ b/src/lexer/lexer.h
@@ -3,7 +3,30 @@
 #include "tokens.h"
 #include "context.h"

-//Autoarr(Token)*
-Maybe lexan(char* source, char* filename);
+void kt_initLexerTypes();

-void kt_initCbLexerTypes();
+STRUCT(Lexer,
+    STNode* keywordSearchTree;
+    Maybe (*analize)(Lexer* lex, char* src_file_name, char* src_file_text);
+    
+    char* source;
+    char* filename;
+    Autoarr(Token)* tokens;
+    string context;
+    string line;
+    string label;
+    u32 linenum;
+    u32 charnum;
+)
+
+Lexer* Lexer_create();
+void Lexer_destroy(Lexer* lex);
+///@return Maybe<Autoarr(Token)*>
+
+///@return Maybe<Autoarr(Token)*>
+Maybe Lexer_parseFile(Lexer* lex, char* src_file_name);
+
+///@return Maybe<Autoarr(Token)*>
+static inline Maybe Lexer_parseText(Lexer* lex, char* src_file_name, char* src_file_text){
+    return lex->analize(lex, src_file_name, src_file_text);
+}
--- a/src/lexer/tokens.c
+++ b/src/lexer/tokens.c
@@ -12,14 +12,3 @@ char* Token_toString(void* _t, u32 fmt){

 kt_define(Token, Token_freeMembers, Token_toString);
 Autoarr_define(Token, false)
-
-
-STNode* keywordsSearchTree=NULL;
-void init_keywordsSearchTree(){
-    keywordsSearchTree=STNode_create();
-    for(TokenId keywordId=0; keywordId<=tok_typeof; keywordId++){
-        const Token* keywordptr=&default_tokens[keywordId];
-        Unitype uni=UniStackPtr(Token, keywordptr);
-        ST_push(keywordsSearchTree, keywordptr->value, uni);
-    }
-}
--- a/src/lexer/tokens.h
+++ b/src/lexer/tokens.h
@@ -31,8 +31,8 @@ PACKED_ENUM(TokenId,
    tok_while,
    tok_switch,
    tok_case,
-    tok_brake,
-    tok_braketo,
+    tok_break,
+    tok_breakto,
    tok_return,
    tok_goto,
    /* declaration keywords */
@@ -72,8 +72,8 @@ PACKED_ENUM(TokenId,
    tok_slash,       /* / */
    tok_assign,      /* = */
    tok_not,         /* ! */
-    tok_equals,      /* == */
-    tok_not_equals,  /* != */
+    tok_equal,      /* == */
+    tok_not_equal,  /* != */
    tok_and,         /* & */
    tok_and_d,       /* && */
    tok_or,          /* | */
@@ -106,97 +106,92 @@ STRUCT(Token,
 )
 Autoarr_declare(Token)

-extern STNode* keywordsSearchTree;
-// dont forget to free it
-void init_keywordsSearchTree();
-
-
 static const Token default_tokens[]={
    /* base types */
-    {"void", tok_void_t},
-    {"i8", tok_i8_t},
-    {"u8", tok_u8_t},
-    {"i16", tok_i16_t},
-    {"u16", tok_u16_t},
-    {"i32", tok_i32_t},
-    {"u32", tok_u32_t},
-    {"i64", tok_i64_t},
-    {"u64", tok_u64_t},
-    {"char", tok_char_t},
-    {"char8", tok_char8_t},
-    {"char16", tok_char16_t},
-    {"bool", tok_bool_t},
-    {"f32", tok_f32_t},
-    {"f64", tok_f64_t},
+    {"void", tok_void_t, 0},
+    {"i8", tok_i8_t, 0},
+    {"u8", tok_u8_t, 0},
+    {"i16", tok_i16_t, 0},
+    {"u16", tok_u16_t, 0},
+    {"i32", tok_i32_t, 0},
+    {"u32", tok_u32_t, 0},
+    {"i64", tok_i64_t, 0},
+    {"u64", tok_u64_t, 0},
+    {"char", tok_char_t, 0},
+    {"char8", tok_char8_t, 0},
+    {"char16", tok_char16_t, 0},
+    {"bool", tok_bool_t, 0},
+    {"f32", tok_f32_t, 0},
+    {"f64", tok_f64_t, 0},
    /* constant */
-    {"null", tok_null},
-    {"true", tok_true},
-    {"false", tok_false},
+    {"null", tok_null, 0},
+    {"true", tok_true, 0},
+    {"false", tok_false, 0},
    /* control flow operators */
-    {"if", tok_if},
-    {"else", tok_else},
-    {"for", tok_for},
-    {"while", tok_while},
-    {"switch", tok_switch},
-    {"case", tok_case},
-    {"brake", tok_brake},
-    {"braketo", tok_braketo},
-    {"return", tok_return},
-    {"goto", tok_goto},
+    {"if", tok_if, 0},
+    {"else", tok_else, 0},
+    {"for", tok_for, 0},
+    {"while", tok_while, 0},
+    {"switch", tok_switch, 0},
+    {"case", tok_case, 0},
+    {"brake", tok_break, 0},
+    {"braketo", tok_breakto, 0},
+    {"return", tok_return, 0},
+    {"goto", tok_goto, 0},
    /* declaration keywords */
-    {"class", tok_class},
-    {"struct", tok_struct},
-    {"enum", tok_enum},
-    {"union", tok_union},
-    {"event", tok_event},
-    {"import", tok_import},
-    {"alias", tok_alias},
+    {"class", tok_class, 0},
+    {"struct", tok_struct, 0},
+    {"enum", tok_enum, 0},
+    {"union", tok_union, 0},
+    {"event", tok_event, 0},
+    {"import", tok_import, 0},
+    {"alias", tok_alias, 0},
    /* access modifiers */
-    {"static", tok_static},
-    {"const", tok_const},
-    {"readonly", tok_readonly},
-    {"protected", tok_protected},
-    {"internal", tok_internal},
-    {"public", tok_public},
-    {"virtual", tok_virtual},
-    {"override", tok_override},
-    {"partial", tok_partial},
+    {"static", tok_static, 0},
+    {"const", tok_const, 0},
+    {"readonly", tok_readonly, 0},
+    {"protected", tok_protected, 0},
+    {"internal", tok_internal, 0},
+    {"public", tok_public, 0},
+    {"virtual", tok_virtual, 0},
+    {"override", tok_override, 0},
+    {"partial", tok_partial, 0},
    /* allocation keywords */
-    {"new", tok_new},
-    {"sizeof", tok_sizeof},
-    {"typeof", tok_typeof},
+    {"new", tok_new, 0},
+    {"sizeof", tok_sizeof, 0},
+    {"typeof", tok_typeof, 0},
    /* special characters */
-    {"(", tok_lbracket},
-    {"{", tok_lbracket_fi},
-    {"[", tok_lbracket_sq},
-    {")", tok_rbracket},
-    {"}", tok_rbracket_fi},
-    {"]", tok_rbracket_sq},
-    {"<", tok_less},
-    {">", tok_more},
-    {"+", tok_plus},
-    {"-", tok_minus},
-    {"*", tok_asterisk},
-    {"/", tok_slash},
-    {"=", tok_assign},
-    {"!", tok_not},
-    {"==", tok_equals},
-    {"!=", tok_not_equals},
-    {"&", tok_and},
-    {"&&", tok_and_d},
-    {"|", tok_or},
-    {"||", tok_or_d},
-    {"?", tok_question},
-    {"??", tok_question_d},
-    {":", tok_colon},
-    {";", tok_semicolon},
-    {".", tok_point},
-    {",", tok_comma},
-    {"~", tok_tilda},
-    {"\\", tok_backslash},
-    {"%", tok_percent},
-    {"^", tok_xor},
-    {"#", tok_lattice},
-    {"$", tok_dollar},
-    {"@", tok_at}
+    {"(", tok_lbracket, 0},
+    {"{", tok_lbracket_fi, 0},
+    {"[", tok_lbracket_sq, 0},
+    {")", tok_rbracket, 0},
+    {"}", tok_rbracket_fi, 0},
+    {"]", tok_rbracket_sq, 0},
+    {"<", tok_less, 0},
+    {">", tok_more, 0},
+    {"+", tok_plus, 0},
+    {"-", tok_minus, 0},
+    {"*", tok_asterisk, 0},
+    {"/", tok_slash, 0},
+    {"=", tok_assign, 0},
+    {"!", tok_not, 0},
+    {"==", tok_equal, 0},
+    {"!=", tok_not_equal, 0},
+    {"&", tok_and, 0},
+    {"&&", tok_and_d, 0},
+    {"|", tok_or, 0},
+    {"||", tok_or_d, 0},
+    {"?", tok_question, 0},
+    {"??", tok_question_d, 0},
+    {":", tok_colon, 0},
+    {";", tok_semicolon, 0},
+    {".", tok_point, 0},
+    {",", tok_comma, 0},
+    {"~", tok_tilda, 0},
+    {"\\", tok_backslash, 0},
+    {"%", tok_percent, 0},
+    {"^", tok_xor, 0},
+    {"#", tok_lattice, 0},
+    {"$", tok_dollar, 0},
+    {"@", tok_at, 0}
 };