source generation

This commit is contained in:
timerix 2023-03-14 01:13:14 +06:00
parent 832b0ced23
commit 0cf8a4e00e
11 changed files with 539 additions and 358 deletions

2
kerep

@ -1 +1 @@
Subproject commit a0458f6d2ef04ce30f5c951281cd7de91bd48cff Subproject commit 2a9214fb795e97aa754ac46346310f94b7e61bfd

View File

@ -1,25 +1,80 @@
#include "cb2c.h" #include "cb2c.h"
#include "../../kerep/src/String/StringBuilder.h" #include "../../kerep/src/String/StringBuilder.h"
void namespaceToC(NamespaceContext* context, StringBuilder* b){ #define addc(C) StringBuilder_append_char(b, C);
#define adds(S) StringBuilder_append_cptr(b, S);
} void appendToken(StringBuilder* b, Token tok, u16* _tab_count){
u16 tab_count=*_tab_count;
adds(tok.value)
void functionToC(FunctionContext* context, StringBuilder* b){ switch(tok.id){
case tok_lbracket_fi:
} tab_count++;
goto add_new_line;
void classToC(ClassContext* context, StringBuilder* b){ case tok_rbracket_fi:
tab_count--;
} goto add_new_line;
case tok_semicolon:
char* contextToC(void* context){ case tok_colon:
StringBuilder* b=StringBuilder_create(); case tok_comment: ;
switch(((Context*)context)->type){ add_new_line:
case CT_Namespace: namespaceToC(context, b); break; addc('\n');
case CT_Function: functionToC(context, b); break; for(u16 i=0; i<tab_count; i++)
case CT_Class: classToC(context, b); break; addc('\t');
default: throw(ERR_WRONGTYPE); break;
case tok_plus: case tok_minus: case tok_asterisk: case tok_slash:
case tok_assign: case tok_equal: case tok_not: case tok_not_equal:
case tok_and: case tok_and_d: case tok_or: case tok_or_d:
case tok_less: case tok_more: case tok_percent:
case tok_for: case tok_if: case tok_while: case tok_switch:
case tok_lbracket: case tok_rbracket: case tok_lbracket_sq: case tok_rbracket_sq:
case tok_break: case tok_backslash:
break;
default:
addc(' ');
} }
return StringBuilder_build(b).ptr;
*_tab_count=tab_count;
}
Maybe appendNamespaceContext(StringBuilder* b, NamespaceContext* context){
adds("/* context ")
if(context->base.namespace!=NULL){
adds(context->base.namespace)
addc('.')
}
adds(context->base.name)
adds(" */\n\n")
u16 tab_count=0;
Autoarr_foreach(context->base.tokens, tok, appendToken(b, tok, &tab_count));
addc('\n');
return MaybeNull;
}
Maybe appendClassContext(StringBuilder* b, ClassContext* context){
safethrow(ERR_NOTIMPLEMENTED,;);
}
Maybe appendFunctionContext(StringBuilder* b, FunctionContext* context){
safethrow(ERR_NOTIMPLEMENTED,;);
}
Maybe translateToC(Context* context){
StringBuilder* b=StringBuilder_create();
switch(context->type){
case ContextType_Namespace:
try(appendNamespaceContext(b, (NamespaceContext*)context), _0, StringBuilder_free(b));
break;
case ContextType_Class:
try(appendClassContext(b, (ClassContext*)context), _2, StringBuilder_free(b));
break;
case ContextType_Function:
try(appendFunctionContext(b, (FunctionContext*)context), _1, StringBuilder_free(b));
break;
default: safethrow(ERR_WRONGTYPE, StringBuilder_free(b));
}
return SUCCESS(UniHeapPtr(char, StringBuilder_build(b).ptr));
} }

View File

@ -2,4 +2,7 @@
#include "../lexer/lexer.h" #include "../lexer/lexer.h"
char* contextToC(void* context); /// @brief generates C source code from tokens
/// @param context contains code tokens
/// @return Maybe<char*> C source code
Maybe translateToC(Context* context);

View File

@ -3,55 +3,151 @@
#include "cb2c.h" #include "cb2c.h"
#include "../lexer/lexer.h" #include "../lexer/lexer.h"
Autoarr(Token)* parseFile(char* filename){ char cb2c_version[]="0.0.1";
tryLast(file_open(filename, FileOpenMode_Read), m_srcfile)
File* srcfile=m_srcfile.value.VoidPtr;
char* src;
tryLast(file_readAll(srcfile, &src), m_srcLen)
u64 srcLen=m_srcLen.value.UInt64;
kprintf("srclen: %lu\n", srcLen);
src[srcLen]='\0';
tryLast(lexan(src, filename), m_tokens)
Autoarr(Token)* tokens=m_tokens.value.VoidPtr;
return tokens;
}
int main(const int argc, const char* const* argv){ i32 main(const int argc, const char* const* argv){
if(!setlocale(LC_ALL, "C.UTF8")) if(setlocale(LC_CTYPE, "C.UTF-8")!=0)
kprintf("\e[93msetlocale failed\n"); kprintf("\e[93msetlocale failed\e[37m\n");
bool compile=0, translate=0; Autoarr(Pointer)* source_files=Autoarr_create(Pointer, 64, 64);
char *compiler=NULL, *tranlation_out_dir=NULL; bool compile=false;
char* compiler=NULL;
char* compilation_out_dir=NULL;
bool translate=false;
char* translation_traget=NULL; // single .c + single .h; namespace structured project
char* translation_out_dir=NULL;
// command arguments
if(argc==1){
kprintf("\e[91mno arguments\e[37m\n");
return -1;
}
for(int argi=1; argi<argc; argi++){ for(int argi=1; argi<argc; argi++){
char* a=argv[argi]; char* a=argv[argi];
kprintf("%s ", a); kprintf("%s\n", a);
if(cptr_compare(a, "-v") || cptr_compare(a, "--version")){
kprintf(" cb2c v%s\n", cb2c_version);
return 0;
}
if(cptr_compare(a, "-c") || cptr_compare(a, "--compiler")){ if(cptr_compare(a, "-c") || cptr_compare(a, "--compiler")){
compile=1; compile=true;
kprintf(" compile=true\n");
if(argi+1>=argc || argv[argi+1][0]=='-') if(argi+1>=argc || argv[argi+1][0]=='-')
throw("after argument --compiler must be compiler name"); throw("after argument --compiler must be compiler name");
compiler=argv[++argi]; compiler=argv[++argi];
kprintf(" compiler=%s\n", compiler);
} }
else if(cptr_compare(a, "-t") || cptr_compare(a, "--tranlation-out-dir")){
translate=1; else if(cptr_compare(a, "-t") || cptr_compare(a, "--translate")){
translate=true;
kprintf(" translate=true\n");
if(argi+1>=argc || argv[argi+1][0]=='-') if(argi+1>=argc || argv[argi+1][0]=='-')
throw("after argument --translation-out-dir must be directory path"); throw("after argument --translation-out-dir must be directory path");
tranlation_out_dir=argv[argi+1];
if(!dir_exists(tranlation_out_dir)) translation_traget=argv[++argi];
dir_create(tranlation_out_dir); kprintf(" translation_traget=%s\n",translation_traget);
}
else if(cptr_compare(a, "-to") || cptr_compare(a, "--tranlation-out-dir")){
if(argi+1>=argc || argv[argi+1][0]=='-')
throw("after argument --translation-out-dir must be directory path");
translation_out_dir=argv[++argi];
kprintf(" translation_out_dir=%s\n", translation_out_dir);
if(!dir_exists(translation_out_dir)){
dir_create(translation_out_dir);
kprintf(" directory was created\n");
}
else kprintf(" directory exists\n");
}
else if(cptr_compare(a, "-o") || cptr_compare(a, "--compilation-out-dir")){
if(argi+1>=argc || argv[argi+1][0]=='-')
throw("after argument --compilation-out-dir must be directory path");
compilation_out_dir=argv[++argi];
kprintf(" compilation_out_dir=%s\n", compilation_out_dir);
if(!dir_exists(compilation_out_dir)){
dir_create(compilation_out_dir);
kprintf(" directory was created\n");
}
else kprintf(" directory exists\n");
}
else {
if(a[0]=='-'){
kprintf(" \e[91minvalid argument: %s\e[37m\n", a);
return -2;
}
if(file_exists(a)){
Autoarr_add(source_files, a);
kprintf(" found source file: %s\n", a);
}
else {
kprintf(" \e[91mfile not found: %s\e[37m\n", a);
return -3;
} }
} }
kprintf("\n"); return 0; }
if(!compile && !translate){
kprintf("\e[91moperation not specified: select --translate and/or --compile\e[37m\n");
return -4;
}
if(Autoarr_length(source_files)==0){
kprintf("\e[91mno source files specified\e[37m\n");
return -5;
}
// kerep type system // kerep type system
kt_beginInit(); kt_beginInit();
kt_initKerepTypes(); kt_initKerepTypes();
kt_initCbLexerTypes(); kt_initLexerTypes();
kt_endInit(); kt_endInit();
// keywords search tree kprint_setColor(kp_fgGray);
init_keywordsSearchTree();
kprint(kp_s|kp_fgGray, "keywordsSearchTree: ", kp_h|kp_pre, keywordsSearchTree, kp_c, '\n'); Lexer* lexer=Lexer_create();
Autoarr(Token)* tokens=parseFile("src.cb");
Autoarr_foreach(source_files, src_file_name, ({
tryLast(Lexer_parseFile(lexer, src_file_name), m_tokens)
Autoarr(Token)* tokens=m_tokens.value.VoidPtr;
kprintf("tokens: %u\n", Autoarr_length(tokens)); kprintf("tokens: %u\n", Autoarr_length(tokens));
Autoarr_foreach(tokens, tok, kprintf("%u %s\n",tok.id, tok.value)); Autoarr_foreach(tokens, tok, kprintf("%u %s\n",tok.id, tok.value));
STNode_free(keywordsSearchTree); NamespaceContext file_context={
.base={
.name=path_basename(src_file_name, false),
.namespace=NULL,
.parent=NULL,
.type=ContextType_Namespace,
.tokens=tokens
}
};
tryLast(translateToC(&file_context.base), m_c_code)
char* generated_code=m_c_code.value.VoidPtr;
kprintf("%s", generated_code);
char* generated_file_name;
if(file_context.base.namespace!=NULL)
generated_file_name=cptr_concat(file_context.base.namespace,".",file_context.base.name,".g.c");
else generated_file_name=cptr_concat(file_context.base.name,".g.c");
tryLast(file_open(generated_file_name, FileOpenMode_Write), m_generated_file)
File* generated_file=m_generated_file.value.VoidPtr;
kprintf("created file %s\n", generated_file_name);
tryLast(file_writeCptr(generated_file, generated_code),_m_1885);
tryLast(file_close(generated_file),_m_14415);
kprintf("source code has been written to the file\n");
free(generated_code);
free(generated_file_name);
}));
Autoarr_free(source_files, true);
Lexer_destroy(lexer);
return 0; return 0;
} }

184
src/lexer/analize.c Normal file
View File

@ -0,0 +1,184 @@
#include "lexer.h"
Maybe _throw_wrongchar(Lexer* lex){
char* _errline=string_extract(lex->line);
char* _context=string_extract(lex->context);
printf("\n\e[91mwrong char <%c> at [%s:%u:%u %s]\n >>> %s\n",
lex->source[lex->charnum], lex->filename,lex->linenum,lex->charnum,_context, _errline);
exit(96);
}
#define throw_wrongchar(freeMem) { freeMem; return _throw_wrongchar(lex); }
// adds <lex->label> to <lex->tokens> as tok_label or tok_number
void _tryAddLabel(Lexer* lex){
if(lex->label.length==0) return;
Unitype uni=ST_pullString(lex->keywordSearchTree, lex->label);
if(uni.VoidPtr!=NULL) // built-in keyword
Autoarr_add(lex->tokens, *(Token*)uni.VoidPtr);
else { // user-defined lex->label
Token ut;
ut.value=string_extract(lex->label);
ut.on_heap=true;
switch(*lex->label.ptr){
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
ut.id=tok_number;
break;
default:
ut.id=tok_label;
break;
}
Autoarr_add(lex->tokens, ut);
}
lex->label=(string){lex->source, 0};
};
#define tryAddLabel() _tryAddLabel(lex)
#define addDefTok(id) { tryAddLabel(); Autoarr_add(lex->tokens, default_tokens[id]); }
void _addDefTok_ifnext(Lexer* lex, char next, TokenId yes, TokenId no){
if(*(lex->source+1)==next){
lex->source++;
addDefTok(yes);
}
else addDefTok(no);
}
#define addDefTok_ifnext(nextChar, yesTok, noTok) _addDefTok_ifnext(lex, nextChar, yesTok, noTok)
// returns text in quotes or error
Maybe _readString(Lexer* lex, char quotChar){
char c;
bool prevIsBackslash=false;
char* srcFirst=lex->source;
while ((c=*++lex->source)){
if(c==quotChar) {
if(prevIsBackslash) // \"
prevIsBackslash=false;
else { // "
string str={srcFirst, lex->source-srcFirst+1};
char* extracted=string_extract(str);
return SUCCESS(UniHeapPtr(char, extracted));
}
}
else prevIsBackslash= c=='\\' && !prevIsBackslash;
}
lex->source=srcFirst;
throw_wrongchar(;);
}
#define readString(quotChar) _readString(lex, quotChar)
Maybe __Lexer_analize(Lexer* lex, char* _filename, char* _source){
lex->filename=_filename;
lex->source=_source;
lex->tokens=Autoarr_create(Token, 64, 1024);
lex->label=(string){_source, 0};
lex->line=(string){_source, 0};
lex->linenum=0;
lex->charnum=0;
char c;
lex->source--;
while ((c=*++(lex->source))) {
// tryAddLabel() resets lex->label.length, but lex->label.ptr still pointing to prev token beginning
if(lex->label.length==0)
lex->label.ptr=lex->source;
switch(c){
case ' ': case '\t':
case '\r': case '\n':
tryAddLabel();
break;
case '(': addDefTok(tok_lbracket); break;
case '{': addDefTok(tok_lbracket_fi); break;
case '[': addDefTok(tok_lbracket_sq); break;
case ')': addDefTok(tok_rbracket); break;
case '}': addDefTok(tok_rbracket_fi); break;
case ']': addDefTok(tok_rbracket_sq); break;
case '\'':
tryAddLabel();
try(readString('\''), maybeC, ;){
Token ctok={
.id=tok_character,
.value=(char*)maybeC.value.VoidPtr
};
Autoarr_add(lex->tokens, ctok);
}
break;
case '"':
tryAddLabel();
try(readString('"'), maybeS, ;){
Token stok={
.id=tok_string,
.value=(char*)maybeS.value.VoidPtr
};
Autoarr_add(lex->tokens, stok);
}
break;
case '<': addDefTok(tok_less); break;
case '>': addDefTok(tok_more); break;
case '+': addDefTok(tok_plus); break;
case '-': addDefTok(tok_minus); break;
case '*': addDefTok(tok_asterisk); break;
case '/':
tryAddLabel();
string commentStr={lex->source, 1};
c=*++lex->source;
if(c=='/') { // single-lex->line comment
commentStr.length++;
while((c=*++lex->source)){
if(c=='\n' || c=='\r') break;
else commentStr.length++;
}
}
else if(c=='*') { // multi-lex->line comment
commentStr.length++; // first slash
while((c=*++lex->source)){
commentStr.length++;
if(c=='*' && *(++lex->source)=='/') break;
}
commentStr.length++; // last slash
}
else { // not comment
lex->source--;
addDefTok(tok_slash);
break;
}
Token comTok={
.value=string_extract(commentStr),
.id=tok_comment
};
Autoarr_add(lex->tokens, comTok);
break;
case '=': addDefTok_ifnext('=', tok_equal, tok_assign); break;
case '!': addDefTok_ifnext('=', tok_not_equal, tok_not); break;
case '&': addDefTok_ifnext('&', tok_and_d, tok_and); break;
case '|': addDefTok_ifnext('|', tok_or_d, tok_or); break;
case '?': addDefTok_ifnext('?', tok_question_d, tok_question);break;
case ':': addDefTok(tok_colon); break;
case ';': addDefTok(tok_semicolon); break;
case '.': addDefTok(tok_point); break;
case ',': addDefTok(tok_comma); break;
case '~': addDefTok(tok_tilda); break;
case '\\':addDefTok(tok_backslash); break;
case '%': addDefTok(tok_percent); break;
case '^': addDefTok(tok_xor); break;
case '$': addDefTok(tok_dollar); break;
case '@': addDefTok(tok_at); break;
default:
lex->label.length++;
break;
}
}
tryAddLabel();
return SUCCESS(UniHeapPtr(Autoarr(Token), lex->tokens));
}

View File

@ -5,15 +5,15 @@
PACKED_ENUM(ContextType, PACKED_ENUM(ContextType,
CT_Namespace, ContextType_Namespace,
CT_Class, ContextType_Class,
CT_Function ContextType_Function
) )
typedef struct Context Context;
STRUCT(Context, STRUCT(Context,
char* name; char* name;
Context* parent; char* namespace; /* nullable */
Context* parent; /* nullable */
Autoarr(Token)* tokens; Autoarr(Token)* tokens;
ContextType type; ContextType type;
) )

View File

@ -1,6 +1,6 @@
#include "lexer.h" #include "lexer.h"
void kt_initCbLexerTypes(){ void kt_initLexerTypes(){
kt_register(Token); kt_register(Token);
kt_register(Autoarr_Token); kt_register(Autoarr_Token);
} }

View File

@ -1,207 +1,43 @@
#include "lexer.h" #include "lexer.h"
#include "../../kerep/src/String/string.h" #include "../../kerep/src/String/string.h"
#include "../../kerep/src/Filesystem/filesystem.h"
STRUCT(SharedLexerData, Maybe __Lexer_analize(Lexer* lex, char* _filename, char* _source);
char* _source;
char* _filename;
Autoarr(Token)* _tokens;
string _context;
string _line;
string _label;
u32 _linenum;
u32 _charnum;
)
#define source sld->_source Lexer* Lexer_create(){
#define filename sld->_filename Lexer* lex=malloc(sizeof(*lex));
#define tokens sld->_tokens *lex=(Lexer){0};
#define context sld->_context lex->analize=__Lexer_analize;
#define line sld->_line lex->keywordSearchTree=STNode_create();
#define label sld->_label for(TokenId keywordId=0; keywordId<=tok_typeof; keywordId++){
#define linenum sld->_linenum const Token* keywordptr=&default_tokens[keywordId];
#define charnum sld->_charnum Unitype uni=UniStackPtr(Token, keywordptr);
ST_push(lex->keywordSearchTree, keywordptr->value, uni);
Maybe _throw_wrongchar(SharedLexerData* sld){ }
char* errline=string_extract(line); return lex;
char* _context=string_extract(context);
printf("\n\e[91mwrong char <%c> at [%s:%u:%u %s]\n >>> %s\n",
source[charnum], filename,linenum,charnum,_context, errline);
exit(96);
} }
#define throw_wrongchar(freeMem) { freeMem; return _throw_wrongchar(sld); }
// adds <label> to <tokens> as tok_label or tok_number void Lexer_freeMembers(void* _l){
void _tryAddLabel(SharedLexerData* sld){ Lexer* lex=(Lexer*)_l;
if(label.length==0) return; STNode_free(lex->keywordSearchTree);
Unitype uni=ST_pullString(keywordsSearchTree, label);
if(uni.VoidPtr!=NULL) // built-in keyword
Autoarr_add(tokens, *(Token*)uni.VoidPtr);
else { // user-defined label
Token ut;
ut.value=string_extract(label);
switch(*label.ptr){
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
ut.id=tok_number;
break;
default:
ut.id=tok_label;
break;
}
Autoarr_add(tokens, ut);
}
label=(string){source, 0};
};
#define tryAddLabel() _tryAddLabel(sld)
#define addDefTok(id) tryAddLabel(); Autoarr_add(tokens, default_tokens[id])
void _addDefTok_ifnext(char next, TokenId yes, TokenId no, SharedLexerData* sld){
if(*(source+1)==next){
source++;
addDefTok(yes);
}
else addDefTok(no);
} }
#define addDefTok_ifnext(nextChar, yesTok, noTok) _addDefTok_ifnext(nextChar, yesTok, noTok, sld)
// returns text in quotes or error kt_define(Lexer, Lexer_freeMembers, NULL)
Maybe _readString(char quotChar, SharedLexerData* sld){
char c; void Lexer_destroy(Lexer* lex){
bool prevIsBackslash=false; Lexer_freeMembers(lex);
char* srcFirst=source; free(lex);
while ((c=*++source)){
if(c==quotChar) {
if(prevIsBackslash) // \"
prevIsBackslash=false;
else { // "
string str={srcFirst, source-srcFirst+1};
char* extracted=string_extract(str);
return SUCCESS(UniHeapPtr(char, extracted));
}
}
else prevIsBackslash= c=='\\' && !prevIsBackslash;
}
source=srcFirst;
throw_wrongchar(;);
} }
#define readString(quotChar) _readString(quotChar, sld)
///@return Maybe<Autoarr(Token)*>
Maybe _lexan(SharedLexerData* sld){ Maybe Lexer_parseFile(Lexer* lex, char* src_file_name){
char c; try(file_open(src_file_name, FileOpenMode_Read), m_src_file,;)
source--; File* src_file=m_src_file.value.VoidPtr;
while ((c=*++source)) { char* src_text;
// tryAddLabel() resets label.length, but label.ptr still pointing to prev token beginning try(file_readAll(src_file, &src_text), m_src_len, file_close(src_file))
if(label.length==0) u64 src_len=m_src_len.value.UInt64;
label.ptr=source; kprintf("srclen: %lu\n", src_len);
try(Lexer_parseText(lex, src_file_name, src_text), m_tokens, file_close(src_file))
switch(c){ Autoarr(Token)* tokens=m_tokens.value.VoidPtr;
case ' ': case '\t':
case '\r': case '\n':
tryAddLabel();
break;
case '(': addDefTok(tok_lbracket); break;
case '{': addDefTok(tok_lbracket_fi); break;
case '[': addDefTok(tok_lbracket_sq); break;
case ')': addDefTok(tok_rbracket); break;
case '}': addDefTok(tok_rbracket_fi); break;
case ']': addDefTok(tok_rbracket_sq); break;
case '\'':
tryAddLabel();
try(readString('\''), maybeC, ;){
Token ctok={
.id=tok_character,
.value=(char*)maybeC.value.VoidPtr
};
Autoarr_add(tokens, ctok);
}
break;
case '"':
tryAddLabel();
try(readString('"'), maybeS, ;){
Token stok={
.id=tok_string,
.value=(char*)maybeS.value.VoidPtr
};
Autoarr_add(tokens, stok);
}
break;
case '<': addDefTok(tok_less); break;
case '>': addDefTok(tok_more); break;
case '+': addDefTok(tok_plus); break;
case '-': addDefTok(tok_minus); break;
case '*': addDefTok(tok_asterisk); break;
case '/':
tryAddLabel();
string commentStr={source, 1};
c=*++source;
if(c=='/') { // single-line comment
commentStr.length++;
while((c=*++source)){
if(c=='\n' || c=='\r') break;
else commentStr.length++;
}
}
else if(c=='*') { // multi-line comment
commentStr.length++;
while((c=*++source)){
commentStr.length++;
if(c=='*' && *(++source)=='/') break;
}
}
else { // not comment
source--;
addDefTok(tok_slash);
break;
}
Token comTok={
.value=string_extract(commentStr),
.id=tok_comment
};
Autoarr_add(tokens, comTok);
break;
case '=': addDefTok_ifnext('=', tok_equals, tok_assign); break;
case '!': addDefTok_ifnext('=', tok_not_equals, tok_not); break;
case '&': addDefTok_ifnext('&', tok_and_d, tok_and); break;
case '|': addDefTok_ifnext('|', tok_or_d, tok_or); break;
case '?': addDefTok_ifnext('?', tok_question_d, tok_question);break;
case ':': addDefTok(tok_colon); break;
case ';': addDefTok(tok_semicolon); break;
case '.': addDefTok(tok_point); break;
case ',': addDefTok(tok_comma); break;
case '~': addDefTok(tok_tilda); break;
case '\\': addDefTok(tok_backslash); break;
case '%': addDefTok(tok_percent); break;
case '^': addDefTok(tok_xor); break;
case '$': addDefTok(tok_dollar); break;
case '@': addDefTok(tok_at); break;
default:
label.length++;
break;
}
}
return SUCCESS(UniHeapPtr(Autoarr(Token), tokens)); return SUCCESS(UniHeapPtr(Autoarr(Token), tokens));
} }
Maybe lexan(char* _source, char* _filename){
SharedLexerData sld={
._source=_source,
._filename=_filename,
._tokens=Autoarr_create(Token, 64, 1024),
._label={_source, 0},
._line={_source, 0},
._linenum=0,
._charnum=0
};
return _lexan(&sld);
}

View File

@ -3,7 +3,30 @@
#include "tokens.h" #include "tokens.h"
#include "context.h" #include "context.h"
//Autoarr(Token)* void kt_initLexerTypes();
Maybe lexan(char* source, char* filename);
void kt_initCbLexerTypes(); STRUCT(Lexer,
STNode* keywordSearchTree;
Maybe (*analize)(Lexer* lex, char* src_file_name, char* src_file_text);
char* source;
char* filename;
Autoarr(Token)* tokens;
string context;
string line;
string label;
u32 linenum;
u32 charnum;
)
Lexer* Lexer_create();
void Lexer_destroy(Lexer* lex);
///@return Maybe<Autoarr(Token)*>
///@return Maybe<Autoarr(Token)*>
Maybe Lexer_parseFile(Lexer* lex, char* src_file_name);
///@return Maybe<Autoarr(Token)*>
static inline Maybe Lexer_parseText(Lexer* lex, char* src_file_name, char* src_file_text){
return lex->analize(lex, src_file_name, src_file_text);
}

View File

@ -12,14 +12,3 @@ char* Token_toString(void* _t, u32 fmt){
kt_define(Token, Token_freeMembers, Token_toString); kt_define(Token, Token_freeMembers, Token_toString);
Autoarr_define(Token, false) Autoarr_define(Token, false)
STNode* keywordsSearchTree=NULL;
void init_keywordsSearchTree(){
keywordsSearchTree=STNode_create();
for(TokenId keywordId=0; keywordId<=tok_typeof; keywordId++){
const Token* keywordptr=&default_tokens[keywordId];
Unitype uni=UniStackPtr(Token, keywordptr);
ST_push(keywordsSearchTree, keywordptr->value, uni);
}
}

View File

@ -31,8 +31,8 @@ PACKED_ENUM(TokenId,
tok_while, tok_while,
tok_switch, tok_switch,
tok_case, tok_case,
tok_brake, tok_break,
tok_braketo, tok_breakto,
tok_return, tok_return,
tok_goto, tok_goto,
/* declaration keywords */ /* declaration keywords */
@ -72,8 +72,8 @@ PACKED_ENUM(TokenId,
tok_slash, /* / */ tok_slash, /* / */
tok_assign, /* = */ tok_assign, /* = */
tok_not, /* ! */ tok_not, /* ! */
tok_equals, /* == */ tok_equal, /* == */
tok_not_equals, /* != */ tok_not_equal, /* != */
tok_and, /* & */ tok_and, /* & */
tok_and_d, /* && */ tok_and_d, /* && */
tok_or, /* | */ tok_or, /* | */
@ -106,97 +106,92 @@ STRUCT(Token,
) )
Autoarr_declare(Token) Autoarr_declare(Token)
extern STNode* keywordsSearchTree;
// dont forget to free it
void init_keywordsSearchTree();
static const Token default_tokens[]={ static const Token default_tokens[]={
/* base types */ /* base types */
{"void", tok_void_t}, {"void", tok_void_t, 0},
{"i8", tok_i8_t}, {"i8", tok_i8_t, 0},
{"u8", tok_u8_t}, {"u8", tok_u8_t, 0},
{"i16", tok_i16_t}, {"i16", tok_i16_t, 0},
{"u16", tok_u16_t}, {"u16", tok_u16_t, 0},
{"i32", tok_i32_t}, {"i32", tok_i32_t, 0},
{"u32", tok_u32_t}, {"u32", tok_u32_t, 0},
{"i64", tok_i64_t}, {"i64", tok_i64_t, 0},
{"u64", tok_u64_t}, {"u64", tok_u64_t, 0},
{"char", tok_char_t}, {"char", tok_char_t, 0},
{"char8", tok_char8_t}, {"char8", tok_char8_t, 0},
{"char16", tok_char16_t}, {"char16", tok_char16_t, 0},
{"bool", tok_bool_t}, {"bool", tok_bool_t, 0},
{"f32", tok_f32_t}, {"f32", tok_f32_t, 0},
{"f64", tok_f64_t}, {"f64", tok_f64_t, 0},
/* constant */ /* constant */
{"null", tok_null}, {"null", tok_null, 0},
{"true", tok_true}, {"true", tok_true, 0},
{"false", tok_false}, {"false", tok_false, 0},
/* control flow operators */ /* control flow operators */
{"if", tok_if}, {"if", tok_if, 0},
{"else", tok_else}, {"else", tok_else, 0},
{"for", tok_for}, {"for", tok_for, 0},
{"while", tok_while}, {"while", tok_while, 0},
{"switch", tok_switch}, {"switch", tok_switch, 0},
{"case", tok_case}, {"case", tok_case, 0},
{"brake", tok_brake}, {"brake", tok_break, 0},
{"braketo", tok_braketo}, {"braketo", tok_breakto, 0},
{"return", tok_return}, {"return", tok_return, 0},
{"goto", tok_goto}, {"goto", tok_goto, 0},
/* declaration keywords */ /* declaration keywords */
{"class", tok_class}, {"class", tok_class, 0},
{"struct", tok_struct}, {"struct", tok_struct, 0},
{"enum", tok_enum}, {"enum", tok_enum, 0},
{"union", tok_union}, {"union", tok_union, 0},
{"event", tok_event}, {"event", tok_event, 0},
{"import", tok_import}, {"import", tok_import, 0},
{"alias", tok_alias}, {"alias", tok_alias, 0},
/* access modifiers */ /* access modifiers */
{"static", tok_static}, {"static", tok_static, 0},
{"const", tok_const}, {"const", tok_const, 0},
{"readonly", tok_readonly}, {"readonly", tok_readonly, 0},
{"protected", tok_protected}, {"protected", tok_protected, 0},
{"internal", tok_internal}, {"internal", tok_internal, 0},
{"public", tok_public}, {"public", tok_public, 0},
{"virtual", tok_virtual}, {"virtual", tok_virtual, 0},
{"override", tok_override}, {"override", tok_override, 0},
{"partial", tok_partial}, {"partial", tok_partial, 0},
/* allocation keywords */ /* allocation keywords */
{"new", tok_new}, {"new", tok_new, 0},
{"sizeof", tok_sizeof}, {"sizeof", tok_sizeof, 0},
{"typeof", tok_typeof}, {"typeof", tok_typeof, 0},
/* special characters */ /* special characters */
{"(", tok_lbracket}, {"(", tok_lbracket, 0},
{"{", tok_lbracket_fi}, {"{", tok_lbracket_fi, 0},
{"[", tok_lbracket_sq}, {"[", tok_lbracket_sq, 0},
{")", tok_rbracket}, {")", tok_rbracket, 0},
{"}", tok_rbracket_fi}, {"}", tok_rbracket_fi, 0},
{"]", tok_rbracket_sq}, {"]", tok_rbracket_sq, 0},
{"<", tok_less}, {"<", tok_less, 0},
{">", tok_more}, {">", tok_more, 0},
{"+", tok_plus}, {"+", tok_plus, 0},
{"-", tok_minus}, {"-", tok_minus, 0},
{"*", tok_asterisk}, {"*", tok_asterisk, 0},
{"/", tok_slash}, {"/", tok_slash, 0},
{"=", tok_assign}, {"=", tok_assign, 0},
{"!", tok_not}, {"!", tok_not, 0},
{"==", tok_equals}, {"==", tok_equal, 0},
{"!=", tok_not_equals}, {"!=", tok_not_equal, 0},
{"&", tok_and}, {"&", tok_and, 0},
{"&&", tok_and_d}, {"&&", tok_and_d, 0},
{"|", tok_or}, {"|", tok_or, 0},
{"||", tok_or_d}, {"||", tok_or_d, 0},
{"?", tok_question}, {"?", tok_question, 0},
{"??", tok_question_d}, {"??", tok_question_d, 0},
{":", tok_colon}, {":", tok_colon, 0},
{";", tok_semicolon}, {";", tok_semicolon, 0},
{".", tok_point}, {".", tok_point, 0},
{",", tok_comma}, {",", tok_comma, 0},
{"~", tok_tilda}, {"~", tok_tilda, 0},
{"\\", tok_backslash}, {"\\", tok_backslash, 0},
{"%", tok_percent}, {"%", tok_percent, 0},
{"^", tok_xor}, {"^", tok_xor, 0},
{"#", tok_lattice}, {"#", tok_lattice, 0},
{"$", tok_dollar}, {"$", tok_dollar, 0},
{"@", tok_at} {"@", tok_at, 0}
}; };