7

2022-07-21 01:18:55 +03:00 · 2022-07-21 01:18:55 +03:00 · 6098820644
commit 6098820644
parent ae8e5c92f9
6 changed files with 289 additions and 262 deletions
--- a/src/copler/main.c
+++ b/src/copler/main.c
@ -1,6 +1,9 @@
 #include "copler.h"
 #include "../lexer/lexer.h"
 int main(){
-    
+    init_keywordsSearchTree();
    STNode_free(keywordsSearchTree);
    return 0;
 }
--- a/src/lexer/lexer.c
+++ b/src/lexer/lexer.c
@ -1,8 +1,7 @@
 #include "lexer.h"
 #include "../../../kerep/src/String/string.h"
-Autoarr_define(Token);
+typedef struct SharedLexerData{
 typedef struct SourceFileInfo{
    char* _source;
    char* _filename;
    Autoarr(Token)* _tokens;
@ -11,42 +10,257 @@ typedef struct SourceFileInfo{
    string _label;
    uint32 _linenum;
    uint32 _charnum;
-} SourceFileInfo;
+} SharedLexerData;
-#define source sfi->_source
+#define source sld->_source
-#define filename sfi->_filename
+#define filename sld->_filename
-#define tokens sfi->_tokens
+#define tokens sld->_tokens
-#define context sfi->_context
+#define context sld->_context
-#define line sfi->_line
+#define line sld->_line
-#define label sfi->_label
+#define label sld->_label
-#define linenum sfi->_linenum
+#define linenum sld->_linenum
-#define charnum sfi->_charnum
+#define charnum sld->_charnum
-Maybe _throw_wrongchar(SourceFileInfo* sfi){
+Maybe _throw_wrongchar(SharedLexerData* sld){
    char* errline=string_extract(line);
    char* _context=string_extract(context);
    printf("\n\e[91mwrong char <%c> at [%s:%u:%u %s]\n >>> %s\n",
                source[charnum], filename,linenum,charnum,context, errline);
    exit(96);
 }
-#define throw_wrongchar(freeMem) { freeMem; _throw_wrongchar(sfi); }
+#define throw_wrongchar(freeMem) { freeMem; return _throw_wrongchar(sld); }
-
+    
-char _charFromEscapeStr(string estr, SourceFileInfo* sfi){
+#define addTok(id) Autoarr_add(tokens, default_tokens[id])
-    if(*estr.ptr!='\\') throw("first char is not \\");
+    
-    switch(*(++estr.ptr)){
+void _addTok_ifnext(char next, TokenId yes, TokenId no, SharedLexerData* sld){
-        case 'n': return '\n';
+    if(*(++source)==next){
-        case 'r': return '\r';
+        addTok(yes);
-        case 't': return '\t';
+    }
-        case 'e': return '\e';
+    else {
-        case '\\': return '\\';
+        source--;
-        case '"': return '"';
+        addTok(no);
        case '\'': return '\'';
        default: throw_wrongchar(;);
    }
 }
-#define charFromEscapeStr(estr) _charFromEscapeStr(estr, sfi)
+#define addTok_ifnext(nextChar, yesTok, noTok) _addTok_ifnext(nextChar, yesTok, noTok, sld)
-Autoarr(Token)* lexan(char* _source, char* _filename){
+// adds <label> to <tokens> as tok_label or tok_number
-    SourceFileInfo sfi={
+void _tryAddLabel(SharedLexerData* sld){
    if(label.length==0) return;
    Unitype fake_uni=ST_pullString(keywordsSearchTree,label);
    if(fake_uni.VoidPtr!=NULL) // built-in keyword
        Autoarr_add(tokens, *(Token*)(void*)&fake_uni);
    else {          // user-defined label
        Token ut;
        ut.value=string_extract(label);
        switch(*label.ptr){
            case '0': case '1': case '2': case '3': case '4':
            case '5': case '6': case '7': case '8': case '9': 
                ut.id=tok_number;
                break;
            default:
                ut.id=tok_label;
                break;
        }
        Autoarr_add(tokens, ut);
    }
    label=(string){source,0};
 };
 #define tryAddLabel() _tryAddLabel(sld)
 // returns text in quotes or error
 Maybe _readString(char quotChar, SharedLexerData* sld){
    char c;
    bool prevIsBackslash=false;
    char* srcFirst=source;
    while ((c=*++source)){
        if(c==quotChar) {
            if(prevIsBackslash) // \"
                prevIsBackslash=false;
            else { // "
                string str={srcFirst, source-srcFirst+1};
                char* extracted=string_extract(str);
                return SUCCESS(UniPtr(CharPtr, extracted));
            }
        } 
        else prevIsBackslash= c=='\\' && !prevIsBackslash;
    }
    source=srcFirst;
    throw_wrongchar(;);
 }
 #define readString(quotChar) _readString(quotChar, sld)
 Maybe _lexan(SharedLexerData* sld){
    char c;
    source--;
    while ((c=*++source)) switch(c){
        case ' ': case '\t':
        case '\r': case '\n':
            tryAddLabel();
            break;
        case '(':
            tryAddLabel();
            addTok(tok_lbracket);
            break;
        case '{':
            tryAddLabel();
            addTok(tok_lbracket_fi);
            break;
        case '[':
            tryAddLabel();
            addTok(tok_lbracket_sq);
            break;
        case ')':
            tryAddLabel();
            addTok(tok_rbracket);
            break;
        case '}':
            tryAddLabel();
            addTok(tok_rbracket_fi);
            break;
        case ']':
            tryAddLabel();
            addTok(tok_rbracket_sq);
            break;
        case '\'':
            tryAddLabel();
            try(readString('\''), maybeC, ;){
                Token ctok={ 
                    .id=tok_character,
                    .value=(char*)maybeC.value.VoidPtr
                };
                Autoarr_add(tokens, ctok);
            }
            break;
        case '"':
            tryAddLabel();
            try(readString('"'), maybeS, ;){
                Token stok={ 
                    .id=tok_string,
                    .value=(char*)maybeS.value.VoidPtr
                };
                Autoarr_add(tokens, stok);
            }
            break;
        case '<':
            tryAddLabel();
            addTok(tok_less);
            break;
        case '>':
            tryAddLabel();
            addTok(tok_more);
            break;
        case '+':
            tryAddLabel();
            addTok(tok_plus);
            break;
        case '-':
            tryAddLabel();
            addTok(tok_minus);
            break;
        case '*':
            tryAddLabel();
            addTok(tok_asterisk);
            break;
        case '/':
            tryAddLabel();
            string commentStr={source,0};
            c=*++source;
            if(c=='/') { // single-line comment
                while((c=*++source)){
                    if(c=='\n' || c=='\r') break;
                    else commentStr.length++;
                }
            }
            else if(c=='*') { // multi-line comment
                while((c=*++source)){
                    commentStr.length++;
                    if(c=='*' && *(++source)=='/') break;
                }
            }
            else { // not comment
                source--;
                addTok(tok_slash);
                break;
            }
            Token comTok={
                .value=string_extract(commentStr),
                .id=tok_comment
            };
            Autoarr_add(tokens,comTok);
            break;
        case '=':
            tryAddLabel();
            addTok_ifnext('=',tok_equals,tok_assign);
            break;
        case '!':
            tryAddLabel();
            addTok_ifnext('=',tok_not_equals,tok_not);
            break;
        case '&':
            tryAddLabel();
            addTok_ifnext('&',tok_and_d,tok_and);
            break;
        case '|':
            tryAddLabel();
            addTok_ifnext('|',tok_or_d,tok_or);
            break;
        case '?':
            tryAddLabel();
            addTok_ifnext('?',tok_question_d,tok_question);
            break;
        case ':':
            tryAddLabel();
            addTok(tok_colon);
            break;
        case ';':
            tryAddLabel();
            addTok(tok_semicolon);
            break;
        case '.':
            tryAddLabel();
            addTok(tok_point);
            break;
        case ',':
            tryAddLabel();
            addTok(tok_comma);
            break;
        case '~':
            tryAddLabel();
            addTok(tok_tilda);
            break;
        case '\\':
            tryAddLabel();
            addTok(tok_backslash);
            break;
        case '%':
            tryAddLabel();
            addTok(tok_percent);
            break;
        case '^':
            tryAddLabel();
            addTok(tok_xor);
            break;
        case '$':
            tryAddLabel();
            addTok(tok_dollar);
            break;
        case '@':
            tryAddLabel();
            addTok(tok_at);
            break;
        default:
            label.length++;
            break;
    }
    return SUCCESS(UniPtr(AutoarrTokenPtr,tokens));
 }
 Maybe lexan(char* _source, char* _filename){
    SharedLexerData sld={
        ._source=_source,
        ._filename=_filename,
        ._tokens=Autoarr_create(Token,64,1024),
@ -55,220 +269,5 @@ Autoarr(Token)* lexan(char* _source, char* _filename){
        ._linenum=0,
        ._charnum=0
    };
-    return _lexan(&sfi);
+    return _lexan(&sld);
-}
+}
 Autoarr(Token)* _lexan(SourceFileInfo* sfi){
    void addlabel(){
        if(label.length==0) return;
        Unitype token_ptr_u=ST_pull_str(label);
        // user-defined label
        if(token_ptr_u.type==Null){
            Token ut={
                .id=tok_label,
                .value=string_cpToCharPtr(label)
            };
            switch(*label.ptr){
                case '0': case '1': case '2': case '3': case '4':
                case '5': case '6': case '7': case '8': case '9': 
                    ut.id=tok_number;
                    break;
                default:
                    break;
            }
            Autoarr_add(tokens, ut);
        }
        // built-in keyword
        else Autoarr_add(tokens, (*token_ptr_u.VoidPtr));
        label={source,0};
    };
    addtok(TokenId id){
       Autoarr_add(tokens, default_tokens[id]);
    }
    addtok_ifnext(char next, TokenId yes, TokenId no){
        if(*(source+1)==next){
             addtok(yes);
             source++;
        }
        else addtok(no);
    }
    while ((c=*source++)) switch(c){
        case ' ': case '\t':
        case '\r': case '\n':
                addlabel();
                break;
        case '(':
            addlabel();
            addtok(tok_lbracket);
            break;
        case '{':
            addlabel();
            addtok(tok_lbracket_fi);
            break;
        case '[':
            addlabel();
            addtok(tok_lbracket_sq);
            break;
        case ')':
            addlabel();
            addtok(tok_rbracket);
            break;
        case '}':
            addlabel();
            addtok(tok_rbracket_fi);
            break;
        case ']':
            addlabel();
            addtok(tok_rbracket_sq);
            break;
        case '\'':
            addlabel();
            Token ctok={ 
                .id=tok_char ,
                .value={++source,1}
            };
            if(*source=='\\'){
                ctok.value=malloc(1);
                *ctok.value=charFromEscapeStr(source++,2);
            }
            if(*(++source)!='\'')
                throw_wrongchar();
            Autoarr_add(tokens, ctok);
            break;
        case '"':
            addlabel();
            break;
        case '<':
            addlabel();
            addtok(tok_less);
            break;
        case '>':
            addlabel();
            addtok(tok_more);
            break;
        case '+':
            addlabel();
            addtok(tok_plus);
            break;
        case '-':
            addlabel();
            addtok(tok_minus);
            break;
        case '*':
            addlabel();
            addtok(tok_asterisk);
            break;
        case '/':
            addlabel();
            if(*(++source)=='/'){
                label={++source,0};
                    while((c=*(source++))
                        if(c=='\n'){
                            // removes \r from the end of comment line
                            if(*(source-2)=='\r')
                                label.length--;
                            goto add_comment;
                        }
                        else label.length++;
                    add_comment:
                    Token comt={
                        .id=tok_comment,
                        .value=label
                    }
                    Autoarr_add(tokens,comt);
            }
            else if(*source=='*'){
                label={++source,0};
                    while((c=*(source++))
                        if(c=='*'&&*(source++)=='/')
                            goto add_comment;
                        else label.length++;
                    add_comment:
                    Token comt={
                        .id=tok_comment,
                        .value=label
                    }
                    Autoarr_add(tokens,comt);
            }
            else{
                source--;
                addtok(tok_slash);
            }
            break;
        case '=':
            addlabel();
            addtok_ifnext('=',tok_equals,tok_assign);
            break;
        case '!':
            addlabel();
 addtok_ifnext('=',tok_not_equals,tok_not);
            break;
        case '&':
            addlabel();
            addtok_ifnext('&',tok_,tok_);
            break;
        case '|':
            addlabel();
            addtok_ifnext('|',tok_or,tok_or_d);
            break;
        case '?':
            addlabel();
            addtok_ifnext('?',tok_question,tok_question_d);
            break;
        case ':':
            addlabel();
            addtok(tok_colon);
            break;
        case ';':
            addlabel();
            addtok(tok_semicolon);
            break;
        case '.':
            addlabel();
            addtok(tok_point);
            break;
        case ',':
            addlabel();
            addtok(tok_comma);
            break;
        case '~':
            addlabel();
            addtok(tok_tilda);
            break;
        case '\\':
            addlabel();
            addtok(tok_backslash);
            break;
        case '%':
            addlabel();
            addtok(tok_percent);
            break;
        case '^':
            addlabel();
            addtok(tok_xor);
            break;
        case '$':
            addlabel();
            addtok(tok_dollar);
            break;
        case '@':
            addlabel();
            addtok(tok_at);
            break;
        default:
            label.length++;
            break;
    }
    return tokens;
 }
--- a/src/lexer/lexer.h
+++ b/src/lexer/lexer.h
@ -1,9 +1,6 @@
 #pragma once
 #include "../../../kerep/src/Autoarr/Autoarr.h"
 #include "../../../kerep/src/String/string.h"
 #include "tokens.h"
-Autoarr_declare(Token)
+//Autoarr(Token)*
-
+Maybe lexan(char* source, char* filename);
 Autoarr(Token)* lexan(char* source, char* filename);
--- a/src/lexer/my_type_ext.h
+++ b/src/lexer/my_type_ext.h
@ -0,0 +1,7 @@
 #pragma once
 #include "../../../kerep/src/base/base.h"
 typedef enum lexer_type{
    AutoarrTokenPtr=my_type_last+1
 } lexer_type;
--- a/src/lexer/tokens.c
+++ b/src/lexer/tokens.c
@ -0,0 +1,12 @@
 #include "tokens.h"
 Autoarr_define(Token)
 void init_keywordsSearchTree(){
    keywordsSearchTree=STNode_create();
    for(TokenId keywordId=0; keywordId<=tok_typeof; keywordId++){
        Token keyword=default_tokens[keywordId];
        Unitype fake_uni=*(Unitype*)(void*)&keyword;
        ST_push(keywordsSearchTree, keyword.value, fake_uni);
    }
 }
--- a/src/lexer/tokens.h
+++ b/src/lexer/tokens.h
@ -1,6 +1,8 @@
 #pragma once
-#include "../../../kerep/src/base/base.h"
+#include "../../../kerep/src/Autoarr/Autoarr.h"
 #include "../../../kerep/src/SearchTree/SearchTree.h"
 #include "my_type_ext.h"
 typedef enum TokenId{
    // base types
@ -35,6 +37,7 @@ typedef enum TokenId{
    tok_return,
    tok_goto,
    // declaration keywords
    tok_class,
    tok_struct,
    tok_enum,
    tok_union,
@ -55,12 +58,6 @@ typedef enum TokenId{
    tok_new, // allocates struct in heap
    tok_sizeof, // size of variable value
    tok_typeof, // type of variable
    // user-defined
    tok_label,
    tok_number,
    tok_character,
    tok_string,
    tok_comment,
    // special characters
    tok_lbracket,    // (
    tok_lbracket_fi, // {
@ -68,8 +65,6 @@ typedef enum TokenId{
    tok_rbracket,    // )
    tok_rbracket_fi, // }
    tok_rbracket_sq, // ]
    //tok_quot,        // '
    //tok_quot_d,      // "
    tok_less,        // <
    tok_more,        // >
    tok_plus,        // +
@ -96,7 +91,13 @@ typedef enum TokenId{
    tok_xor,         // ^
    tok_lattice,     // #
    tok_dollar,      // $
-    tok_at           // @
+    tok_at,           // @
    // user-defined
    tok_label,
    tok_number,
    tok_character,
    tok_string,
    tok_comment
 } __attribute__((__packed__)) TokenId;
 typedef struct Token{
@ -104,6 +105,11 @@ typedef struct Token{
    TokenId id;
 } Token;
 static STNode* keywordsSearchTree;
 // dont forget to free it
 void init_keywordsSearchTree();
 static const Token default_tokens[]={
    // base types
    {"void", tok_void_t},
@ -137,6 +143,7 @@ static const Token default_tokens[]={
    {"return", tok_return},
    {"goto", tok_goto},
    // declaration keywords
    {"class", tok_class},
    {"struct", tok_struct},
    {"enum", tok_enum},
    {"union", tok_union},
@ -192,3 +199,5 @@ static const Token default_tokens[]={
    {"$", tok_dollar},
    {"@", tok_at}
 };
 Autoarr_declare(Token)