From 0cf8a4e00e16f111d4d4372aae2b9c0c5ca47477 Mon Sep 17 00:00:00 2001 From: timerix Date: Tue, 14 Mar 2023 01:13:14 +0600 Subject: [PATCH] source generation --- kerep | 2 +- src/cb2c/cb2c.c | 91 ++++++++++++++---- src/cb2c/cb2c.h | 5 +- src/cb2c/main.c | 164 +++++++++++++++++++++++++------- src/lexer/analize.c | 184 ++++++++++++++++++++++++++++++++++++ src/lexer/context.h | 10 +- src/lexer/init.c | 2 +- src/lexer/lexer.c | 226 ++++++-------------------------------------- src/lexer/lexer.h | 29 +++++- src/lexer/tokens.c | 11 --- src/lexer/tokens.h | 173 ++++++++++++++++----------------- 11 files changed, 539 insertions(+), 358 deletions(-) create mode 100644 src/lexer/analize.c diff --git a/kerep b/kerep index a0458f6..2a9214f 160000 --- a/kerep +++ b/kerep @@ -1 +1 @@ -Subproject commit a0458f6d2ef04ce30f5c951281cd7de91bd48cff +Subproject commit 2a9214fb795e97aa754ac46346310f94b7e61bfd diff --git a/src/cb2c/cb2c.c b/src/cb2c/cb2c.c index 78711fb..d2aed5d 100644 --- a/src/cb2c/cb2c.c +++ b/src/cb2c/cb2c.c @@ -1,25 +1,80 @@ #include "cb2c.h" #include "../../kerep/src/String/StringBuilder.h" -void namespaceToC(NamespaceContext* context, StringBuilder* b){ - -} +#define addc(C) StringBuilder_append_char(b, C); +#define adds(S) StringBuilder_append_cptr(b, S); -void functionToC(FunctionContext* context, StringBuilder* b){ - -} +void appendToken(StringBuilder* b, Token tok, u16* _tab_count){ + u16 tab_count=*_tab_count; + adds(tok.value) -void classToC(ClassContext* context, StringBuilder* b){ - -} - -char* contextToC(void* context){ - StringBuilder* b=StringBuilder_create(); - switch(((Context*)context)->type){ - case CT_Namespace: namespaceToC(context, b); break; - case CT_Function: functionToC(context, b); break; - case CT_Class: classToC(context, b); break; - default: throw(ERR_WRONGTYPE); + switch(tok.id){ + case tok_lbracket_fi: + tab_count++; + goto add_new_line; + case tok_rbracket_fi: + tab_count--; + goto add_new_line; + case tok_semicolon: + case tok_colon: + case tok_comment: ; + add_new_line: + addc('\n'); + for(u16 i=0; ibase.namespace!=NULL){ + adds(context->base.namespace) + addc('.') + } + adds(context->base.name) + adds(" */\n\n") + + u16 tab_count=0; + Autoarr_foreach(context->base.tokens, tok, appendToken(b, tok, &tab_count)); + addc('\n'); + + return MaybeNull; +} + +Maybe appendClassContext(StringBuilder* b, ClassContext* context){ + safethrow(ERR_NOTIMPLEMENTED,;); +} + +Maybe appendFunctionContext(StringBuilder* b, FunctionContext* context){ + safethrow(ERR_NOTIMPLEMENTED,;); +} + +Maybe translateToC(Context* context){ + StringBuilder* b=StringBuilder_create(); + switch(context->type){ + case ContextType_Namespace: + try(appendNamespaceContext(b, (NamespaceContext*)context), _0, StringBuilder_free(b)); + break; + case ContextType_Class: + try(appendClassContext(b, (ClassContext*)context), _2, StringBuilder_free(b)); + break; + case ContextType_Function: + try(appendFunctionContext(b, (FunctionContext*)context), _1, StringBuilder_free(b)); + break; + default: safethrow(ERR_WRONGTYPE, StringBuilder_free(b)); + } + return SUCCESS(UniHeapPtr(char, StringBuilder_build(b).ptr)); } diff --git a/src/cb2c/cb2c.h b/src/cb2c/cb2c.h index 00d84fe..7c2298f 100644 --- a/src/cb2c/cb2c.h +++ b/src/cb2c/cb2c.h @@ -2,4 +2,7 @@ #include "../lexer/lexer.h" -char* contextToC(void* context); +/// @brief generates C source code from tokens +/// @param context contains code tokens +/// @return Maybe C source code +Maybe translateToC(Context* context); diff --git a/src/cb2c/main.c b/src/cb2c/main.c index 881f06f..4e17ee8 100644 --- a/src/cb2c/main.c +++ b/src/cb2c/main.c @@ -3,55 +3,151 @@ #include "cb2c.h" #include "../lexer/lexer.h" -Autoarr(Token)* parseFile(char* filename){ - tryLast(file_open(filename, FileOpenMode_Read), m_srcfile) - File* srcfile=m_srcfile.value.VoidPtr; - char* src; - tryLast(file_readAll(srcfile, &src), m_srcLen) - u64 srcLen=m_srcLen.value.UInt64; - kprintf("srclen: %lu\n", srcLen); - src[srcLen]='\0'; - tryLast(lexan(src, filename), m_tokens) - Autoarr(Token)* tokens=m_tokens.value.VoidPtr; - return tokens; -} +char cb2c_version[]="0.0.1"; -int main(const int argc, const char* const* argv){ - if(!setlocale(LC_ALL, "C.UTF8")) - kprintf("\e[93msetlocale failed\n"); +i32 main(const int argc, const char* const* argv){ + if(setlocale(LC_CTYPE, "C.UTF-8")!=0) + kprintf("\e[93msetlocale failed\e[37m\n"); - bool compile=0, translate=0; - char *compiler=NULL, *tranlation_out_dir=NULL; + Autoarr(Pointer)* source_files=Autoarr_create(Pointer, 64, 64); + bool compile=false; + char* compiler=NULL; + char* compilation_out_dir=NULL; + bool translate=false; + char* translation_traget=NULL; // single .c + single .h; namespace structured project + char* translation_out_dir=NULL; + + // command arguments + if(argc==1){ + kprintf("\e[91mno arguments\e[37m\n"); + return -1; + } for(int argi=1; argi=argc || argv[argi+1][0]=='-') throw("after argument --compiler must be compiler name"); + compiler=argv[++argi]; + kprintf(" compiler=%s\n", compiler); } - else if(cptr_compare(a, "-t") || cptr_compare(a, "--tranlation-out-dir")){ - translate=1; + + else if(cptr_compare(a, "-t") || cptr_compare(a, "--translate")){ + translate=true; + kprintf(" translate=true\n"); + if(argi+1>=argc || argv[argi+1][0]=='-') throw("after argument --translation-out-dir must be directory path"); - tranlation_out_dir=argv[argi+1]; - if(!dir_exists(tranlation_out_dir)) - dir_create(tranlation_out_dir); + + translation_traget=argv[++argi]; + kprintf(" translation_traget=%s\n",translation_traget); + } + + else if(cptr_compare(a, "-to") || cptr_compare(a, "--tranlation-out-dir")){ + if(argi+1>=argc || argv[argi+1][0]=='-') + throw("after argument --translation-out-dir must be directory path"); + + translation_out_dir=argv[++argi]; + kprintf(" translation_out_dir=%s\n", translation_out_dir); + + if(!dir_exists(translation_out_dir)){ + dir_create(translation_out_dir); + kprintf(" directory was created\n"); + } + else kprintf(" directory exists\n"); + } + + else if(cptr_compare(a, "-o") || cptr_compare(a, "--compilation-out-dir")){ + if(argi+1>=argc || argv[argi+1][0]=='-') + throw("after argument --compilation-out-dir must be directory path"); + + compilation_out_dir=argv[++argi]; + kprintf(" compilation_out_dir=%s\n", compilation_out_dir); + + if(!dir_exists(compilation_out_dir)){ + dir_create(compilation_out_dir); + kprintf(" directory was created\n"); + } + else kprintf(" directory exists\n"); + } + + else { + if(a[0]=='-'){ + kprintf(" \e[91minvalid argument: %s\e[37m\n", a); + return -2; + } + if(file_exists(a)){ + Autoarr_add(source_files, a); + kprintf(" found source file: %s\n", a); + } + else { + kprintf(" \e[91mfile not found: %s\e[37m\n", a); + return -3; + } } } - kprintf("\n"); return 0; + + if(!compile && !translate){ + kprintf("\e[91moperation not specified: select --translate and/or --compile\e[37m\n"); + return -4; + } + if(Autoarr_length(source_files)==0){ + kprintf("\e[91mno source files specified\e[37m\n"); + return -5; + } + // kerep type system kt_beginInit(); kt_initKerepTypes(); - kt_initCbLexerTypes(); + kt_initLexerTypes(); kt_endInit(); - // keywords search tree - init_keywordsSearchTree(); - kprint(kp_s|kp_fgGray, "keywordsSearchTree: ", kp_h|kp_pre, keywordsSearchTree, kp_c, '\n'); - Autoarr(Token)* tokens=parseFile("src.cb"); - kprintf("tokens: %u\n", Autoarr_length(tokens)); - Autoarr_foreach(tokens, tok, kprintf("%u %s\n",tok.id, tok.value)); - STNode_free(keywordsSearchTree); + kprint_setColor(kp_fgGray); + + Lexer* lexer=Lexer_create(); + + Autoarr_foreach(source_files, src_file_name, ({ + tryLast(Lexer_parseFile(lexer, src_file_name), m_tokens) + Autoarr(Token)* tokens=m_tokens.value.VoidPtr; + kprintf("tokens: %u\n", Autoarr_length(tokens)); + Autoarr_foreach(tokens, tok, kprintf("%u %s\n",tok.id, tok.value)); + NamespaceContext file_context={ + .base={ + .name=path_basename(src_file_name, false), + .namespace=NULL, + .parent=NULL, + .type=ContextType_Namespace, + .tokens=tokens + } + }; + tryLast(translateToC(&file_context.base), m_c_code) + char* generated_code=m_c_code.value.VoidPtr; + kprintf("%s", generated_code); + + char* generated_file_name; + if(file_context.base.namespace!=NULL) + generated_file_name=cptr_concat(file_context.base.namespace,".",file_context.base.name,".g.c"); + else generated_file_name=cptr_concat(file_context.base.name,".g.c"); + tryLast(file_open(generated_file_name, FileOpenMode_Write), m_generated_file) + File* generated_file=m_generated_file.value.VoidPtr; + kprintf("created file %s\n", generated_file_name); + tryLast(file_writeCptr(generated_file, generated_code),_m_1885); + tryLast(file_close(generated_file),_m_14415); + kprintf("source code has been written to the file\n"); + free(generated_code); + free(generated_file_name); + })); + + Autoarr_free(source_files, true); + Lexer_destroy(lexer); return 0; -} \ No newline at end of file +} diff --git a/src/lexer/analize.c b/src/lexer/analize.c new file mode 100644 index 0000000..098d11e --- /dev/null +++ b/src/lexer/analize.c @@ -0,0 +1,184 @@ +#include "lexer.h" + +Maybe _throw_wrongchar(Lexer* lex){ + char* _errline=string_extract(lex->line); + char* _context=string_extract(lex->context); + printf("\n\e[91mwrong char <%c> at [%s:%u:%u %s]\n >>> %s\n", + lex->source[lex->charnum], lex->filename,lex->linenum,lex->charnum,_context, _errline); + exit(96); +} +#define throw_wrongchar(freeMem) { freeMem; return _throw_wrongchar(lex); } + +// adds label> to tokens> as tok_label or tok_number +void _tryAddLabel(Lexer* lex){ + if(lex->label.length==0) return; + + Unitype uni=ST_pullString(lex->keywordSearchTree, lex->label); + if(uni.VoidPtr!=NULL) // built-in keyword + Autoarr_add(lex->tokens, *(Token*)uni.VoidPtr); + else { // user-defined lex->label + Token ut; + ut.value=string_extract(lex->label); + ut.on_heap=true; + switch(*lex->label.ptr){ + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + ut.id=tok_number; + break; + default: + ut.id=tok_label; + break; + } + Autoarr_add(lex->tokens, ut); + } + + lex->label=(string){lex->source, 0}; +}; +#define tryAddLabel() _tryAddLabel(lex) + +#define addDefTok(id) { tryAddLabel(); Autoarr_add(lex->tokens, default_tokens[id]); } + +void _addDefTok_ifnext(Lexer* lex, char next, TokenId yes, TokenId no){ + if(*(lex->source+1)==next){ + lex->source++; + addDefTok(yes); + } + else addDefTok(no); +} +#define addDefTok_ifnext(nextChar, yesTok, noTok) _addDefTok_ifnext(lex, nextChar, yesTok, noTok) + +// returns text in quotes or error +Maybe _readString(Lexer* lex, char quotChar){ + char c; + bool prevIsBackslash=false; + char* srcFirst=lex->source; + while ((c=*++lex->source)){ + if(c==quotChar) { + if(prevIsBackslash) // \" + prevIsBackslash=false; + else { // " + string str={srcFirst, lex->source-srcFirst+1}; + char* extracted=string_extract(str); + return SUCCESS(UniHeapPtr(char, extracted)); + } + } + else prevIsBackslash= c=='\\' && !prevIsBackslash; + } + lex->source=srcFirst; + throw_wrongchar(;); +} +#define readString(quotChar) _readString(lex, quotChar) + + +Maybe __Lexer_analize(Lexer* lex, char* _filename, char* _source){ + lex->filename=_filename; + lex->source=_source; + lex->tokens=Autoarr_create(Token, 64, 1024); + lex->label=(string){_source, 0}; + lex->line=(string){_source, 0}; + lex->linenum=0; + lex->charnum=0; + + char c; + lex->source--; + while ((c=*++(lex->source))) { + // tryAddLabel() resets lex->label.length, but lex->label.ptr still pointing to prev token beginning + if(lex->label.length==0) + lex->label.ptr=lex->source; + + switch(c){ + case ' ': case '\t': + case '\r': case '\n': + tryAddLabel(); + break; + + case '(': addDefTok(tok_lbracket); break; + case '{': addDefTok(tok_lbracket_fi); break; + case '[': addDefTok(tok_lbracket_sq); break; + case ')': addDefTok(tok_rbracket); break; + case '}': addDefTok(tok_rbracket_fi); break; + case ']': addDefTok(tok_rbracket_sq); break; + + case '\'': + tryAddLabel(); + try(readString('\''), maybeC, ;){ + Token ctok={ + .id=tok_character, + .value=(char*)maybeC.value.VoidPtr + }; + Autoarr_add(lex->tokens, ctok); + } + break; + case '"': + tryAddLabel(); + try(readString('"'), maybeS, ;){ + Token stok={ + .id=tok_string, + .value=(char*)maybeS.value.VoidPtr + }; + Autoarr_add(lex->tokens, stok); + } + break; + + case '<': addDefTok(tok_less); break; + case '>': addDefTok(tok_more); break; + case '+': addDefTok(tok_plus); break; + case '-': addDefTok(tok_minus); break; + case '*': addDefTok(tok_asterisk); break; + + case '/': + tryAddLabel(); + string commentStr={lex->source, 1}; + c=*++lex->source; + if(c=='/') { // single-lex->line comment + commentStr.length++; + while((c=*++lex->source)){ + if(c=='\n' || c=='\r') break; + else commentStr.length++; + } + } + else if(c=='*') { // multi-lex->line comment + commentStr.length++; // first slash + while((c=*++lex->source)){ + commentStr.length++; + if(c=='*' && *(++lex->source)=='/') break; + } + commentStr.length++; // last slash + } + else { // not comment + lex->source--; + addDefTok(tok_slash); + break; + } + Token comTok={ + .value=string_extract(commentStr), + .id=tok_comment + }; + Autoarr_add(lex->tokens, comTok); + break; + + case '=': addDefTok_ifnext('=', tok_equal, tok_assign); break; + case '!': addDefTok_ifnext('=', tok_not_equal, tok_not); break; + case '&': addDefTok_ifnext('&', tok_and_d, tok_and); break; + case '|': addDefTok_ifnext('|', tok_or_d, tok_or); break; + case '?': addDefTok_ifnext('?', tok_question_d, tok_question);break; + case ':': addDefTok(tok_colon); break; + case ';': addDefTok(tok_semicolon); break; + case '.': addDefTok(tok_point); break; + case ',': addDefTok(tok_comma); break; + case '~': addDefTok(tok_tilda); break; + case '\\':addDefTok(tok_backslash); break; + case '%': addDefTok(tok_percent); break; + case '^': addDefTok(tok_xor); break; + case '$': addDefTok(tok_dollar); break; + case '@': addDefTok(tok_at); break; + + default: + lex->label.length++; + break; + } + } + + tryAddLabel(); + return SUCCESS(UniHeapPtr(Autoarr(Token), lex->tokens)); +} diff --git a/src/lexer/context.h b/src/lexer/context.h index 25239e8..4c8c0f1 100644 --- a/src/lexer/context.h +++ b/src/lexer/context.h @@ -5,15 +5,15 @@ PACKED_ENUM(ContextType, - CT_Namespace, - CT_Class, - CT_Function + ContextType_Namespace, + ContextType_Class, + ContextType_Function ) -typedef struct Context Context; STRUCT(Context, char* name; - Context* parent; + char* namespace; /* nullable */ + Context* parent; /* nullable */ Autoarr(Token)* tokens; ContextType type; ) diff --git a/src/lexer/init.c b/src/lexer/init.c index 7169cd1..8f8c540 100644 --- a/src/lexer/init.c +++ b/src/lexer/init.c @@ -1,6 +1,6 @@ #include "lexer.h" -void kt_initCbLexerTypes(){ +void kt_initLexerTypes(){ kt_register(Token); kt_register(Autoarr_Token); } diff --git a/src/lexer/lexer.c b/src/lexer/lexer.c index fdb2669..1bb803f 100644 --- a/src/lexer/lexer.c +++ b/src/lexer/lexer.c @@ -1,207 +1,43 @@ #include "lexer.h" #include "../../kerep/src/String/string.h" +#include "../../kerep/src/Filesystem/filesystem.h" -STRUCT(SharedLexerData, - char* _source; - char* _filename; - Autoarr(Token)* _tokens; - string _context; - string _line; - string _label; - u32 _linenum; - u32 _charnum; -) +Maybe __Lexer_analize(Lexer* lex, char* _filename, char* _source); -#define source sld->_source -#define filename sld->_filename -#define tokens sld->_tokens -#define context sld->_context -#define line sld->_line -#define label sld->_label -#define linenum sld->_linenum -#define charnum sld->_charnum - -Maybe _throw_wrongchar(SharedLexerData* sld){ - char* errline=string_extract(line); - char* _context=string_extract(context); - printf("\n\e[91mwrong char <%c> at [%s:%u:%u %s]\n >>> %s\n", - source[charnum], filename,linenum,charnum,_context, errline); - exit(96); +Lexer* Lexer_create(){ + Lexer* lex=malloc(sizeof(*lex)); + *lex=(Lexer){0}; + lex->analize=__Lexer_analize; + lex->keywordSearchTree=STNode_create(); + for(TokenId keywordId=0; keywordId<=tok_typeof; keywordId++){ + const Token* keywordptr=&default_tokens[keywordId]; + Unitype uni=UniStackPtr(Token, keywordptr); + ST_push(lex->keywordSearchTree, keywordptr->value, uni); + } + return lex; } -#define throw_wrongchar(freeMem) { freeMem; return _throw_wrongchar(sld); } -// adds