From 042e3e6670949c2d75f5600749690e152c2af7a7 Mon Sep 17 00:00:00 2001 From: Timerix22 Date: Thu, 24 Mar 2022 15:24:46 +0300 Subject: [PATCH] 3 --- README.md | 2 +- cobek_compiler.h | 13 +++ commands.c | 4 + commands.h | 16 ++++ context.c | 3 + context.h | 14 +++ lexer.c | 227 +++++++++++++++++++++++++++++++++++++++++++++-- parser.c | 5 ++ tokens.h | 48 ++++++---- 9 files changed, 306 insertions(+), 26 deletions(-) create mode 100644 cobek_compiler.h create mode 100644 commands.c create mode 100644 commands.h create mode 100644 context.c create mode 100644 context.h create mode 100644 parser.c diff --git a/README.md b/README.md index 069b659..60f3b17 100644 --- a/README.md +++ b/README.md @@ -1 +1 @@ -## Cobek +## cobek diff --git a/cobek_compiler.h b/cobek_compiler.h new file mode 100644 index 0000000..56a3483 --- /dev/null +++ b/cobek_compiler.h @@ -0,0 +1,13 @@ +#pragma once +#include "tokens.h" +#include "context.h" + +Autoarr_declare(Token); + +Autoarr(Token)* lexan(char* source, char* filename); + +Context* parse(Autoarr(Token)* tokens); + +string genBytecode(Context* toplvl_context); + +Assembly genAssembly(Autoarr(string)* compiled_contexts, AssemblyParams params); diff --git a/commands.c b/commands.c new file mode 100644 index 0000000..4f70635 --- /dev/null +++ b/commands.c @@ -0,0 +1,4 @@ +#pragma once +#include "comands.h" + +Autoarr_define(Command); diff --git a/commands.h b/commands.h new file mode 100644 index 0000000..ad22f08 --- /dev/null +++ b/commands.h @@ -0,0 +1,16 @@ +#pragma once +#include "../kerep/Autoarr/Autoarr.h" + +typedef enum CommandId{ + +} __attribute__((__packed__)) CommandId; + +struct CommandStruct; +typedef CommandStruct Command; +struct CommandStruct{ + Command* next; + Command* prev; + CommandId id; +} + +Autoarr_declare(Comand); \ No newline at end of file diff --git a/context.c b/context.c new file mode 100644 index 0000000..2cf17e9 --- /dev/null +++ b/context.c @@ -0,0 +1,3 @@ +#include "context.h" + +Autoarr_define(Context); diff --git a/context.h b/context.h new file mode 100644 index 0000000..b7ccf46 --- /dev/null +++ b/context.h @@ -0,0 +1,14 @@ +#pragma once +#include "commands.h" + +struct ContextStruct; +typedef ContextStruct Context; + +Autoarr_declare(Context); + +struct ContextStruct{ + Context* parent; + Autoarr(Context) children; + Autoarr(Command) commandChain; + Autoarr(Constant) constantStack; +} diff --git a/lexer.c b/lexer.c index 42d6d21..711add7 100644 --- a/lexer.c +++ b/lexer.c @@ -1,24 +1,80 @@ +#include "cobek_compiler.h" -Autoarr(Token)* lexan(char* source){ +Autoarr_define(Token); + + +char charFromEscapeStr(string estr){ + if(*estr.ptr!='\\') throw("first char is not \\"); + switch(*(++estr.ptr)){ + case 'n': return '\n'; + case 'r': return '\r'; + case 't': return '\t'; + case 'e': return '\e'; + case '\\': return '\\'; + case '"': return '"'; + case '\'': return '\''; + default: throw(ERR_WRONGCHAR); + } +} + + +Autoarr(Token)* lexan(char* source, char* filename){ Autoarr(Token)* tokens=malloc(sizeof(Autoarr(Token))); *tokens=Autoarr_create(Token,64,1024); char c; string label={source,0}; + string line={source,0}; + uint32 linenum; + uint32 cnum; + + + void throw_wrongchar(){ + char* errline=string_cpToCharPtr(line); + printf("\n\e[91mwrong char <%c> at [%s:%u:%u %s]\n >>> %s\n" ,filename,linenum,cnum,c,errline); + exit(96); + }; + void addlabel(){ - if(label.length!=0) return; - Unitype token=ST_pull_str(label); - if(token.type==Null){ - //custom token + if(label.length==0) return; + Unitype token_ptr_u=ST_pull_str(label); + // user-defined label + if(token_ptr_u.type==Null){ + Token ut={ + .id=tok_label, + .value=string_cpToCharPtr(label) + }; + switch(*label.ptr){ + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + ut.id=tok_number; + break; + default: + break; + } + Autoarr_add(tokens, ut); } - Autoarr_add(tokens, (*token.VoidPtr)); + // built-in keyword + else Autoarr_add(tokens, (*token_ptr_u.VoidPtr)); + label={source,0}; }; + addtok(TokenId id){ Autoarr_add(tokens, default_tokens[id]); } + + addtok_ifnext(char next, TokenId yes, TokenId no){ + if(*(source+1)==next){ + addtok(yes); + source++; + } + else addtok(no); + } + + while ((c=*source++)) switch(c){ case ' ': case '\t': case '\r': case '\n': @@ -28,7 +84,166 @@ Autoarr(Token)* lexan(char* source){ addlabel(); addtok(tok_lbracket); break; + case '{': + addlabel(); + addtok(tok_lbracket_fi); + break; + case '[': + addlabel(); + addtok(tok_lbracket_sq); + break; + case ')': + addlabel(); + addtok(tok_rbracket); + break; + case '}': + addlabel(); + addtok(tok_rbracket_fi); + break; + case ']': + addlabel(); + addtok(tok_rbracket_sq); + break; + case '\'': + addlabel(); + Token ctok={ + .id=tok_char , + .value={++source,1} + }; + if(*source=='\\'){ + ctok.value=malloc(1); + *ctok.value=charFromEscapeStr(source++,2); + } + if(*(++source)!='\'') + throw_wrongchar(); + Autoarr_add(tokens, ctok); + break; + case '"': + addlabel(); + break; + case '<': + addlabel(); + addtok(tok_less); + break; + case '>': + addlabel(); + addtok(tok_more); + break; + case '+': + addlabel(); + addtok(tok_plus); + break; + case '-': + addlabel(); + addtok(tok_minus); + break; + case '*': + addlabel(); + addtok(tok_asterisk); + break; + case '/': + addlabel(); + if(*(++source)=='/'){ + label={++source,0}; + while((c=*(source++)) + if(c=='\n'){ + // removes \r from the end of comment line + if(*(source-2)=='\r') + label.length--; + goto add_comment; + } + else label.length++; + add_comment: + Token comt={ + .id=tok_comment, + .value=label + } + Autoarr_add(tokens,comt); + } + else if(*source=='*'){ + label={++source,0}; + while((c=*(source++)) + if(c=='*'&&*(source++)=='/') + goto add_comment; + else label.length++; + add_comment: + Token comt={ + .id=tok_comment, + .value=label + } + Autoarr_add(tokens,comt); + } + else{ + source--; + addtok(tok_slash); + } + break; + case '=': + addlabel(); + addtok_ifnext('=',tok_equals,tok_assign); + break; + case '!': + addlabel(); +addtok_ifnext('=',tok_not_equals,tok_not); + break; + case '&': + addlabel(); + addtok_ifnext('&',tok_,tok_); + break; + case '|': + addlabel(); + addtok_ifnext('|',tok_or,tok_or_d); + break; + case '?': + addlabel(); + addtok_ifnext('?',tok_question,tok_question_d); + break; + case ':': + addlabel(); + addtok(tok_colon); + break; + case ';': + addlabel(); + addtok(tok_semicolon); + break; + case '.': + addlabel(); + addtok(tok_point); + break; + case ',': + addlabel(); + addtok(tok_comma); + break; + case '~': + addlabel(); + addtok(tok_tilda); + break; + case '\\': + addlabel(); + addtok(tok_backslash); + break; + case '%': + addlabel(); + addtok(tok_percent); + break; + case '^': + addlabel(); + addtok(tok_xor); + break; + case '$': + addlabel(); + addtok(tok_dollar); + break; + case '@': + addlabel(); + addtok(tok_at); + break; + default: + label.length++; + break; } + + return tokens; } diff --git a/parser.c b/parser.c new file mode 100644 index 0000000..79e993b --- /dev/null +++ b/parser.c @@ -0,0 +1,5 @@ +#include "cobek_compiler.h" + +Context parse(Autoarr(Token)* tokens){ + +} diff --git a/tokens.h b/tokens.h index 1a9b6e2..b40d1a6 100644 --- a/tokens.h +++ b/tokens.h @@ -1,4 +1,5 @@ -#include "" +#pragma once +#include "../kerep/base/std.h" typedef enum TokenId{ // base types @@ -23,63 +24,72 @@ typedef enum TokenId{ tok_false, // control flow operators tok_if, + tok_else, tok_for, tok_while, tok_switch, tok_case, tok_brake, + tok_braketo, tok_return, + tok_goto, // declaration keywords tok_struct, tok_enum, tok_union, - tok_alias, tok_event, tok_import, - // access modiiers + tok_alias, + // access modifiers tok_const, tok_protected, tok_public, tok_virtual, - // cpecial characters + // user-defined + tok_label, + tok_number, + tok_char, + tok_string, + tok_comment, + // special characters tok_lbracket, // ( tok_lbracket_fi, // { tok_lbracket_sq, // [ tok_rbracket, // ) tok_rbracket_fi, // } tok_rbracket_sq, // ] - tok_quot, // ' - tok_quot_d, // " + //tok_quot, // ' + //tok_quot_d, // " tok_less, // < tok_more, // > tok_plus, // + tok_minus, // - - tok_multip, // * - tok_divide, // / + tok_asterisk, // * + tok_slash, // / tok_assign, // = - tok_colon, // : - tok_semicolon, // ; - tok_point, // . - tok_comma, // , - tok_question, // ? - tok_question_d, // ?? + tok_not, // ! + tok_equals, // == + tok_not_equals, // != tok_and, // & tok_and_d, // && tok_or, // | tok_or_d, // || - tok_not, // ! - tok_equals, // == - tok_not_equals, // != + tok_question, // ? + tok_question_d, // ?? + tok_colon, // : + tok_semicolon, // ; + tok_point, // . + tok_comma, // , tok_tilda, // ~ tok_backslash, // \ tok_percent, // % tok_xor, // ^ tok_lattice, // # tok_dollar, // $ - tok_ + tok_at // @ } __attribute__((__packed__)) TokenId; typedef struct Token{ + char* value; TokenId id; - Unitype value; } Token;