3

2022-03-24 15:24:46 +03:00 · 2022-03-24 15:24:46 +03:00 · 042e3e6670
commit 042e3e6670
parent e33e5eb751
9 changed files with 306 additions and 26 deletions
--- a/README.md
+++ b/README.md
@ -1 +1 @@
-## Cobek
+## cobek
--- a/cobek_compiler.h
+++ b/cobek_compiler.h
@ -0,0 +1,13 @@
+#pragma once
+#include "tokens.h"
+#include "context.h"
+
+Autoarr_declare(Token);
+
+Autoarr(Token)* lexan(char* source, char* filename);
+
+Context* parse(Autoarr(Token)* tokens);
+
+string genBytecode(Context* toplvl_context);
+
+Assembly genAssembly(Autoarr(string)* compiled_contexts, AssemblyParams params);
--- a/commands.c
+++ b/commands.c
@ -0,0 +1,4 @@
+#pragma once
+#include "comands.h"
+
+Autoarr_define(Command);
--- a/commands.h
+++ b/commands.h
@ -0,0 +1,16 @@
+#pragma once
+#include "../kerep/Autoarr/Autoarr.h"
+
+typedef enum CommandId{
+    
+} __attribute__((__packed__)) CommandId;
+
+struct CommandStruct;
+typedef CommandStruct Command;
+struct CommandStruct{
+    Command* next;
+    Command* prev;
+    CommandId id;
+}
+
+Autoarr_declare(Comand);
--- a/context.c
+++ b/context.c
@ -0,0 +1,3 @@
+#include "context.h"
+
+Autoarr_define(Context);
--- a/context.h
+++ b/context.h
@ -0,0 +1,14 @@
+#pragma once
+#include "commands.h"
+
+struct ContextStruct;
+typedef ContextStruct Context;
+
+Autoarr_declare(Context);
+
+struct ContextStruct{
+    Context* parent;
+    Autoarr(Context) children;
+    Autoarr(Command) commandChain;
+    Autoarr(Constant) constantStack;
+}
--- a/lexer.c
+++ b/lexer.c
@ -1,24 +1,80 @@
+#include "cobek_compiler.h"

-Autoarr(Token)* lexan(char* source){
+Autoarr_define(Token);
+
+
+char charFromEscapeStr(string estr){
+    if(*estr.ptr!='\\') throw("first char is not \\");
+    switch(*(++estr.ptr)){
+        case 'n': return '\n';
+        case 'r': return '\r';
+        case 't': return '\t';
+        case 'e': return '\e';
+        case '\\': return '\\';
+        case '"': return '"';
+        case '\'': return '\'';
+        default: throw(ERR_WRONGCHAR);    
+    }
+}
+
+
+Autoarr(Token)* lexan(char* source, char* filename){
    Autoarr(Token)* tokens=malloc(sizeof(Autoarr(Token)));
    *tokens=Autoarr_create(Token,64,1024);
    char c;
    string label={source,0};
+    string line={source,0};
+    uint32 linenum;
+    uint32 cnum;
+    
+    
+    void throw_wrongchar(){
+        char* errline=string_cpToCharPtr(line);
+        printf("\n\e[91mwrong char <%c> at [%s:%u:%u %s]\n >>> %s\n" ,filename,linenum,cnum,c,errline);
+        exit(96);
+    };
+    
    
    void addlabel(){
-        if(label.length!=0) return;
-        Unitype token=ST_pull_str(label);
-        if(token.type==Null){
-            //custom token
+        if(label.length==0) return;
+        Unitype token_ptr_u=ST_pull_str(label);
+        // user-defined label
+        if(token_ptr_u.type==Null){
+            Token ut={
+                .id=tok_label,
+                .value=string_cpToCharPtr(label)
+            };
+            switch(*label.ptr){
+                case '0': case '1': case '2': case '3': case '4':
+                case '5': case '6': case '7': case '8': case '9': 
+                    ut.id=tok_number;
+                    break;
+                default:
+                    break;
+            }
+            Autoarr_add(tokens, ut);
        }
-        Autoarr_add(tokens, (*token.VoidPtr));
+        // built-in keyword
+        else Autoarr_add(tokens, (*token_ptr_u.VoidPtr));
+        
        label={source,0};
    };
    
+    
    addtok(TokenId id){
       Autoarr_add(tokens, default_tokens[id]);
    }
    
+    
+    addtok_ifnext(char next, TokenId yes, TokenId no){
+        if(*(source+1)==next){
+             addtok(yes);
+             source++;
+        }
+        else addtok(no);
+    }
+    
+    
    while ((c=*source++)) switch(c){
        case ' ': case '\t':
        case '\r': case '\n':
@ -28,7 +84,166 @@ Autoarr(Token)* lexan(char* source){
            addlabel();
            addtok(tok_lbracket);
            break;
+        case '{':
+            addlabel();
+            addtok(tok_lbracket_fi);
+            break;
+        case '[':
+            addlabel();
+            addtok(tok_lbracket_sq);
+            break;
+        case ')':
+            addlabel();
+            addtok(tok_rbracket);
+            break;
+        case '}':
+            addlabel();
+            addtok(tok_rbracket_fi);
+            break;
+        case ']':
+            addlabel();
+            addtok(tok_rbracket_sq);
+            break;
+        case '\'':
+            addlabel();
+            Token ctok={ 
+                .id=tok_char ,
+                .value={++source,1}
+            };
+            if(*source=='\\'){
+                ctok.value=malloc(1);
+                *ctok.value=charFromEscapeStr(source++,2);
+            }
+            if(*(++source)!='\'')
+                throw_wrongchar();
+            Autoarr_add(tokens, ctok);
+            break;
+        case '"':
+            addlabel();
            
            
+            break;
+        case '<':
+            addlabel();
+            addtok(tok_less);
+            break;
+        case '>':
+            addlabel();
+            addtok(tok_more);
+            break;
+        case '+':
+            addlabel();
+            addtok(tok_plus);
+            break;
+        case '-':
+            addlabel();
+            addtok(tok_minus);
+            break;
+        case '*':
+            addlabel();
+            addtok(tok_asterisk);
+            break;
+        case '/':
+            addlabel();
+            if(*(++source)=='/'){
+                label={++source,0};
+                    while((c=*(source++))
+                        if(c=='\n'){
+                            // removes \r from the end of comment line
+                            if(*(source-2)=='\r')
+                                label.length--;
+                            goto add_comment;
+                        }
+                        else label.length++;
+                    add_comment:
+                    Token comt={
+                        .id=tok_comment,
+                        .value=label
+                    }
+                    Autoarr_add(tokens,comt);
+            }
+            else if(*source=='*'){
+                label={++source,0};
+                    while((c=*(source++))
+                        if(c=='*'&&*(source++)=='/')
+                            goto add_comment;
+                        else label.length++;
+                    add_comment:
+                    Token comt={
+                        .id=tok_comment,
+                        .value=label
+                    }
+                    Autoarr_add(tokens,comt);
+            }
+            else{
+                source--;
+                addtok(tok_slash);
+            }
+            break;
+        case '=':
+            addlabel();
+            addtok_ifnext('=',tok_equals,tok_assign);
+            break;
+        case '!':
+            addlabel();
+addtok_ifnext('=',tok_not_equals,tok_not);
+            break;
+        case '&':
+            addlabel();
+            addtok_ifnext('&',tok_,tok_);
+            break;
+        case '|':
+            addlabel();
+            addtok_ifnext('|',tok_or,tok_or_d);
+            break;
+        case '?':
+            addlabel();
+            addtok_ifnext('?',tok_question,tok_question_d);
+            break;
+        case ':':
+            addlabel();
+            addtok(tok_colon);
+            break;
+        case ';':
+            addlabel();
+            addtok(tok_semicolon);
+            break;
+        case '.':
+            addlabel();
+            addtok(tok_point);
+            break;
+        case ',':
+            addlabel();
+            addtok(tok_comma);
+            break;
+        case '~':
+            addlabel();
+            addtok(tok_tilda);
+            break;
+        case '\\':
+            addlabel();
+            addtok(tok_backslash);
+            break;
+        case '%':
+            addlabel();
+            addtok(tok_percent);
+            break;
+        case '^':
+            addlabel();
+            addtok(tok_xor);
+            break;
+        case '$':
+            addlabel();
+            addtok(tok_dollar);
+            break;
+        case '@':
+            addlabel();
+            addtok(tok_at);
+            break;
+        default:
+            label.length++;
+            break;
    }
+    
+    return tokens;
 }
--- a/parser.c
+++ b/parser.c
@ -0,0 +1,5 @@
+#include "cobek_compiler.h"
+
+Context parse(Autoarr(Token)* tokens){
+    
+}
--- a/tokens.h
+++ b/tokens.h
@ -1,4 +1,5 @@
-#include ""
+#pragma once
+#include "../kerep/base/std.h"

 typedef enum TokenId{
    // base types
@ -23,63 +24,72 @@ typedef enum TokenId{
    tok_false,
    // control flow operators
    tok_if,
+    tok_else,
    tok_for,
    tok_while,
    tok_switch,
    tok_case,
    tok_brake,
+    tok_braketo,
    tok_return,
+    tok_goto,
    // declaration keywords
    tok_struct,
    tok_enum,
    tok_union,
-    tok_alias,
    tok_event,
    tok_import,
-    // access modiiers
+    tok_alias,
+    // access modifiers
    tok_const,
    tok_protected,
    tok_public,
    tok_virtual,
-    // cpecial characters
+    // user-defined
+    tok_label,
+    tok_number,
+    tok_char,
+    tok_string,
+    tok_comment,
+    // special characters
    tok_lbracket,    // (
    tok_lbracket_fi, // {
    tok_lbracket_sq, // [
    tok_rbracket,    // )
    tok_rbracket_fi, // }
    tok_rbracket_sq, // ]
-    tok_quot,        // '
-    tok_quot_d,      // "
+    //tok_quot,        // '
+    //tok_quot_d,      // "
    tok_less,        // <
    tok_more,        // >
    tok_plus,        // +
    tok_minus,       // -
-    tok_multip,      // *
-    tok_divide,      // /
+    tok_asterisk,    // *
+    tok_slash,       // /
    tok_assign,      // =
-    tok_colon,       // :
-    tok_semicolon,   // ;
-    tok_point,       // .
-    tok_comma,       // ,
-    tok_question,    // ?
-    tok_question_d,  // ??
+    tok_not,         // !
+    tok_equals,      // ==
+    tok_not_equals,  // !=
    tok_and,         // &
    tok_and_d,       // &&
    tok_or,          // |
    tok_or_d,        // ||
-    tok_not,         // !
-    tok_equals,      // ==
-    tok_not_equals,  // !=
+    tok_question,    // ?
+    tok_question_d,  // ??
+    tok_colon,       // :
+    tok_semicolon,   // ;
+    tok_point,       // .
+    tok_comma,       // ,
    tok_tilda,       // ~
    tok_backslash,   // \
    tok_percent,     // %
    tok_xor,         // ^
    tok_lattice,     // #
    tok_dollar,      // $
-    tok_
+    tok_at           // @
 } __attribute__((__packed__)) TokenId;

 typedef struct Token{
+    char* value;
    TokenId id;
-    Unitype value;
 } Token;