From 042e3e6670949c2d75f5600749690e152c2af7a7 Mon Sep 17 00:00:00 2001
From: Timerix22 <timerix11@gmail.com>
Date: Thu, 24 Mar 2022 15:24:46 +0300
Subject: [PATCH] 3

---
 README.md        |   2 +-
 cobek_compiler.h |  13 +++
 commands.c       |   4 +
 commands.h       |  16 ++++
 context.c        |   3 +
 context.h        |  14 +++
 lexer.c          | 227 +++++++++++++++++++++++++++++++++++++++++++++--
 parser.c         |   5 ++
 tokens.h         |  48 ++++++----
 9 files changed, 306 insertions(+), 26 deletions(-)
 create mode 100644 cobek_compiler.h
 create mode 100644 commands.c
 create mode 100644 commands.h
 create mode 100644 context.c
 create mode 100644 context.h
 create mode 100644 parser.c

diff --git a/README.md b/README.md
index 069b659..60f3b17 100644
--- a/README.md
+++ b/README.md
@@ -1 +1 @@
-## Cobek
+## cobek
diff --git a/cobek_compiler.h b/cobek_compiler.h
new file mode 100644
index 0000000..56a3483
--- /dev/null
+++ b/cobek_compiler.h
@@ -0,0 +1,13 @@
+#pragma once
+#include "tokens.h"
+#include "context.h"
+
+Autoarr_declare(Token);
+
+Autoarr(Token)* lexan(char* source, char* filename);
+
+Context* parse(Autoarr(Token)* tokens);
+
+string genBytecode(Context* toplvl_context);
+
+Assembly genAssembly(Autoarr(string)* compiled_contexts, AssemblyParams params);
diff --git a/commands.c b/commands.c
new file mode 100644
index 0000000..4f70635
--- /dev/null
+++ b/commands.c
@@ -0,0 +1,4 @@
+#pragma once
+#include "comands.h"
+
+Autoarr_define(Command);
diff --git a/commands.h b/commands.h
new file mode 100644
index 0000000..ad22f08
--- /dev/null
+++ b/commands.h
@@ -0,0 +1,16 @@
+#pragma once
+#include "../kerep/Autoarr/Autoarr.h"
+
+typedef enum CommandId{
+    
+} __attribute__((__packed__)) CommandId;
+
+struct CommandStruct;
+typedef CommandStruct Command;
+struct CommandStruct{
+    Command* next;
+    Command* prev;
+    CommandId id;
+}
+
+Autoarr_declare(Comand);
\ No newline at end of file
diff --git a/context.c b/context.c
new file mode 100644
index 0000000..2cf17e9
--- /dev/null
+++ b/context.c
@@ -0,0 +1,3 @@
+#include "context.h"
+
+Autoarr_define(Context);
diff --git a/context.h b/context.h
new file mode 100644
index 0000000..b7ccf46
--- /dev/null
+++ b/context.h
@@ -0,0 +1,14 @@
+#pragma once
+#include "commands.h"
+
+struct ContextStruct;
+typedef ContextStruct Context;
+
+Autoarr_declare(Context);
+
+struct ContextStruct{
+    Context* parent;
+    Autoarr(Context) children;
+    Autoarr(Command) commandChain;
+    Autoarr(Constant) constantStack;
+}
diff --git a/lexer.c b/lexer.c
index 42d6d21..711add7 100644
--- a/lexer.c
+++ b/lexer.c
@@ -1,24 +1,80 @@
+#include "cobek_compiler.h"
 
-Autoarr(Token)* lexan(char* source){
+Autoarr_define(Token);
+
+
+char charFromEscapeStr(string estr){
+    if(*estr.ptr!='\\') throw("first char is not \\");
+    switch(*(++estr.ptr)){
+        case 'n': return '\n';
+        case 'r': return '\r';
+        case 't': return '\t';
+        case 'e': return '\e';
+        case '\\': return '\\';
+        case '"': return '"';
+        case '\'': return '\'';
+        default: throw(ERR_WRONGCHAR);    
+    }
+}
+
+
+Autoarr(Token)* lexan(char* source, char* filename){
     Autoarr(Token)* tokens=malloc(sizeof(Autoarr(Token)));
     *tokens=Autoarr_create(Token,64,1024);
     char c;
     string label={source,0};
+    string line={source,0};
+    uint32 linenum;
+    uint32 cnum;
+    
+    
+    void throw_wrongchar(){
+        char* errline=string_cpToCharPtr(line);
+        printf("\n\e[91mwrong char <%c> at [%s:%u:%u %s]\n >>> %s\n" ,filename,linenum,cnum,c,errline);
+        exit(96);
+    };
+    
     
     void addlabel(){
-        if(label.length!=0) return;
-        Unitype token=ST_pull_str(label);
-        if(token.type==Null){
-            //custom token
+        if(label.length==0) return;
+        Unitype token_ptr_u=ST_pull_str(label);
+        // user-defined label
+        if(token_ptr_u.type==Null){
+            Token ut={
+                .id=tok_label,
+                .value=string_cpToCharPtr(label)
+            };
+            switch(*label.ptr){
+                case '0': case '1': case '2': case '3': case '4':
+                case '5': case '6': case '7': case '8': case '9': 
+                    ut.id=tok_number;
+                    break;
+                default:
+                    break;
+            }
+            Autoarr_add(tokens, ut);
         }
-        Autoarr_add(tokens, (*token.VoidPtr));
+        // built-in keyword
+        else Autoarr_add(tokens, (*token_ptr_u.VoidPtr));
+        
         label={source,0};
     };
     
+    
     addtok(TokenId id){
        Autoarr_add(tokens, default_tokens[id]);
     }
     
+    
+    addtok_ifnext(char next, TokenId yes, TokenId no){
+        if(*(source+1)==next){
+             addtok(yes);
+             source++;
+        }
+        else addtok(no);
+    }
+    
+    
     while ((c=*source++)) switch(c){
         case ' ': case '\t':
         case '\r': case '\n':
@@ -28,7 +84,166 @@ Autoarr(Token)* lexan(char* source){
             addlabel();
             addtok(tok_lbracket);
             break;
+        case '{':
+            addlabel();
+            addtok(tok_lbracket_fi);
+            break;
+        case '[':
+            addlabel();
+            addtok(tok_lbracket_sq);
+            break;
+        case ')':
+            addlabel();
+            addtok(tok_rbracket);
+            break;
+        case '}':
+            addlabel();
+            addtok(tok_rbracket_fi);
+            break;
+        case ']':
+            addlabel();
+            addtok(tok_rbracket_sq);
+            break;
+        case '\'':
+            addlabel();
+            Token ctok={ 
+                .id=tok_char ,
+                .value={++source,1}
+            };
+            if(*source=='\\'){
+                ctok.value=malloc(1);
+                *ctok.value=charFromEscapeStr(source++,2);
+            }
+            if(*(++source)!='\'')
+                throw_wrongchar();
+            Autoarr_add(tokens, ctok);
+            break;
+        case '"':
+            addlabel();
             
             
+            break;
+        case '<':
+            addlabel();
+            addtok(tok_less);
+            break;
+        case '>':
+            addlabel();
+            addtok(tok_more);
+            break;
+        case '+':
+            addlabel();
+            addtok(tok_plus);
+            break;
+        case '-':
+            addlabel();
+            addtok(tok_minus);
+            break;
+        case '*':
+            addlabel();
+            addtok(tok_asterisk);
+            break;
+        case '/':
+            addlabel();
+            if(*(++source)=='/'){
+                label={++source,0};
+                    while((c=*(source++))
+                        if(c=='\n'){
+                            // removes \r from the end of comment line
+                            if(*(source-2)=='\r')
+                                label.length--;
+                            goto add_comment;
+                        }
+                        else label.length++;
+                    add_comment:
+                    Token comt={
+                        .id=tok_comment,
+                        .value=label
+                    }
+                    Autoarr_add(tokens,comt);
+            }
+            else if(*source=='*'){
+                label={++source,0};
+                    while((c=*(source++))
+                        if(c=='*'&&*(source++)=='/')
+                            goto add_comment;
+                        else label.length++;
+                    add_comment:
+                    Token comt={
+                        .id=tok_comment,
+                        .value=label
+                    }
+                    Autoarr_add(tokens,comt);
+            }
+            else{
+                source--;
+                addtok(tok_slash);
+            }
+            break;
+        case '=':
+            addlabel();
+            addtok_ifnext('=',tok_equals,tok_assign);
+            break;
+        case '!':
+            addlabel();
+addtok_ifnext('=',tok_not_equals,tok_not);
+            break;
+        case '&':
+            addlabel();
+            addtok_ifnext('&',tok_,tok_);
+            break;
+        case '|':
+            addlabel();
+            addtok_ifnext('|',tok_or,tok_or_d);
+            break;
+        case '?':
+            addlabel();
+            addtok_ifnext('?',tok_question,tok_question_d);
+            break;
+        case ':':
+            addlabel();
+            addtok(tok_colon);
+            break;
+        case ';':
+            addlabel();
+            addtok(tok_semicolon);
+            break;
+        case '.':
+            addlabel();
+            addtok(tok_point);
+            break;
+        case ',':
+            addlabel();
+            addtok(tok_comma);
+            break;
+        case '~':
+            addlabel();
+            addtok(tok_tilda);
+            break;
+        case '\\':
+            addlabel();
+            addtok(tok_backslash);
+            break;
+        case '%':
+            addlabel();
+            addtok(tok_percent);
+            break;
+        case '^':
+            addlabel();
+            addtok(tok_xor);
+            break;
+        case '$':
+            addlabel();
+            addtok(tok_dollar);
+            break;
+        case '@':
+            addlabel();
+            addtok(tok_at);
+            break;
+        default:
+            label.length++;
+            break;
     }
+    
+    return tokens;
 }
diff --git a/parser.c b/parser.c
new file mode 100644
index 0000000..79e993b
--- /dev/null
+++ b/parser.c
@@ -0,0 +1,5 @@
+#include "cobek_compiler.h"
+
+Context parse(Autoarr(Token)* tokens){
+    
+}
diff --git a/tokens.h b/tokens.h
index 1a9b6e2..b40d1a6 100644
--- a/tokens.h
+++ b/tokens.h
@@ -1,4 +1,5 @@
-#include ""
+#pragma once
+#include "../kerep/base/std.h"
 
 typedef enum TokenId{
     // base types
@@ -23,63 +24,72 @@ typedef enum TokenId{
     tok_false,
     // control flow operators
     tok_if,
+    tok_else,
     tok_for,
     tok_while,
     tok_switch,
     tok_case,
     tok_brake,
+    tok_braketo,
     tok_return,
+    tok_goto,
     // declaration keywords
     tok_struct,
     tok_enum,
     tok_union,
-    tok_alias,
     tok_event,
     tok_import,
-    // access modiiers
+    tok_alias,
+    // access modifiers
     tok_const,
     tok_protected,
     tok_public,
     tok_virtual,
-    // cpecial characters
+    // user-defined
+    tok_label,
+    tok_number,
+    tok_char,
+    tok_string,
+    tok_comment,
+    // special characters
     tok_lbracket,    // (
     tok_lbracket_fi, // {
     tok_lbracket_sq, // [
     tok_rbracket,    // )
     tok_rbracket_fi, // }
     tok_rbracket_sq, // ]
-    tok_quot,        // '
-    tok_quot_d,      // "
+    //tok_quot,        // '
+    //tok_quot_d,      // "
     tok_less,        // <
     tok_more,        // >
     tok_plus,        // +
     tok_minus,       // -
-    tok_multip,      // *
-    tok_divide,      // /
+    tok_asterisk,    // *
+    tok_slash,       // /
     tok_assign,      // =
-    tok_colon,       // :
-    tok_semicolon,   // ;
-    tok_point,       // .
-    tok_comma,       // ,
-    tok_question,    // ?
-    tok_question_d,  // ??
+    tok_not,         // !
+    tok_equals,      // ==
+    tok_not_equals,  // !=
     tok_and,         // &
     tok_and_d,       // &&
     tok_or,          // |
     tok_or_d,        // ||
-    tok_not,         // !
-    tok_equals,      // ==
-    tok_not_equals,  // !=
+    tok_question,    // ?
+    tok_question_d,  // ??
+    tok_colon,       // :
+    tok_semicolon,   // ;
+    tok_point,       // .
+    tok_comma,       // ,
     tok_tilda,       // ~
     tok_backslash,   // \
     tok_percent,     // %
     tok_xor,         // ^
     tok_lattice,     // #
     tok_dollar,      // $
-    tok_
+    tok_at           // @
 } __attribute__((__packed__)) TokenId;
 
 typedef struct Token{
+    char* value;
     TokenId id;
-    Unitype value;
 } Token;