source generation
This commit is contained in:
parent
832b0ced23
commit
0cf8a4e00e
2
kerep
2
kerep
@ -1 +1 @@
|
||||
Subproject commit a0458f6d2ef04ce30f5c951281cd7de91bd48cff
|
||||
Subproject commit 2a9214fb795e97aa754ac46346310f94b7e61bfd
|
||||
@ -1,25 +1,80 @@
|
||||
#include "cb2c.h"
|
||||
#include "../../kerep/src/String/StringBuilder.h"
|
||||
|
||||
void namespaceToC(NamespaceContext* context, StringBuilder* b){
|
||||
#define addc(C) StringBuilder_append_char(b, C);
|
||||
#define adds(S) StringBuilder_append_cptr(b, S);
|
||||
|
||||
}
|
||||
void appendToken(StringBuilder* b, Token tok, u16* _tab_count){
|
||||
u16 tab_count=*_tab_count;
|
||||
adds(tok.value)
|
||||
|
||||
void functionToC(FunctionContext* context, StringBuilder* b){
|
||||
|
||||
}
|
||||
|
||||
void classToC(ClassContext* context, StringBuilder* b){
|
||||
|
||||
}
|
||||
|
||||
char* contextToC(void* context){
|
||||
StringBuilder* b=StringBuilder_create();
|
||||
switch(((Context*)context)->type){
|
||||
case CT_Namespace: namespaceToC(context, b); break;
|
||||
case CT_Function: functionToC(context, b); break;
|
||||
case CT_Class: classToC(context, b); break;
|
||||
default: throw(ERR_WRONGTYPE);
|
||||
switch(tok.id){
|
||||
case tok_lbracket_fi:
|
||||
tab_count++;
|
||||
goto add_new_line;
|
||||
case tok_rbracket_fi:
|
||||
tab_count--;
|
||||
goto add_new_line;
|
||||
case tok_semicolon:
|
||||
case tok_colon:
|
||||
case tok_comment: ;
|
||||
add_new_line:
|
||||
addc('\n');
|
||||
for(u16 i=0; i<tab_count; i++)
|
||||
addc('\t');
|
||||
break;
|
||||
case tok_plus: case tok_minus: case tok_asterisk: case tok_slash:
|
||||
case tok_assign: case tok_equal: case tok_not: case tok_not_equal:
|
||||
case tok_and: case tok_and_d: case tok_or: case tok_or_d:
|
||||
case tok_less: case tok_more: case tok_percent:
|
||||
case tok_for: case tok_if: case tok_while: case tok_switch:
|
||||
case tok_lbracket: case tok_rbracket: case tok_lbracket_sq: case tok_rbracket_sq:
|
||||
case tok_break: case tok_backslash:
|
||||
break;
|
||||
default:
|
||||
addc(' ');
|
||||
}
|
||||
return StringBuilder_build(b).ptr;
|
||||
|
||||
*_tab_count=tab_count;
|
||||
}
|
||||
|
||||
Maybe appendNamespaceContext(StringBuilder* b, NamespaceContext* context){
|
||||
adds("/* context ")
|
||||
if(context->base.namespace!=NULL){
|
||||
adds(context->base.namespace)
|
||||
addc('.')
|
||||
}
|
||||
adds(context->base.name)
|
||||
adds(" */\n\n")
|
||||
|
||||
u16 tab_count=0;
|
||||
Autoarr_foreach(context->base.tokens, tok, appendToken(b, tok, &tab_count));
|
||||
addc('\n');
|
||||
|
||||
return MaybeNull;
|
||||
}
|
||||
|
||||
Maybe appendClassContext(StringBuilder* b, ClassContext* context){
|
||||
safethrow(ERR_NOTIMPLEMENTED,;);
|
||||
}
|
||||
|
||||
Maybe appendFunctionContext(StringBuilder* b, FunctionContext* context){
|
||||
safethrow(ERR_NOTIMPLEMENTED,;);
|
||||
}
|
||||
|
||||
Maybe translateToC(Context* context){
|
||||
StringBuilder* b=StringBuilder_create();
|
||||
switch(context->type){
|
||||
case ContextType_Namespace:
|
||||
try(appendNamespaceContext(b, (NamespaceContext*)context), _0, StringBuilder_free(b));
|
||||
break;
|
||||
case ContextType_Class:
|
||||
try(appendClassContext(b, (ClassContext*)context), _2, StringBuilder_free(b));
|
||||
break;
|
||||
case ContextType_Function:
|
||||
try(appendFunctionContext(b, (FunctionContext*)context), _1, StringBuilder_free(b));
|
||||
break;
|
||||
default: safethrow(ERR_WRONGTYPE, StringBuilder_free(b));
|
||||
}
|
||||
return SUCCESS(UniHeapPtr(char, StringBuilder_build(b).ptr));
|
||||
}
|
||||
|
||||
@ -2,4 +2,7 @@
|
||||
|
||||
#include "../lexer/lexer.h"
|
||||
|
||||
char* contextToC(void* context);
|
||||
/// @brief generates C source code from tokens
|
||||
/// @param context contains code tokens
|
||||
/// @return Maybe<char*> C source code
|
||||
Maybe translateToC(Context* context);
|
||||
|
||||
158
src/cb2c/main.c
158
src/cb2c/main.c
@ -3,55 +3,151 @@
|
||||
#include "cb2c.h"
|
||||
#include "../lexer/lexer.h"
|
||||
|
||||
Autoarr(Token)* parseFile(char* filename){
|
||||
tryLast(file_open(filename, FileOpenMode_Read), m_srcfile)
|
||||
File* srcfile=m_srcfile.value.VoidPtr;
|
||||
char* src;
|
||||
tryLast(file_readAll(srcfile, &src), m_srcLen)
|
||||
u64 srcLen=m_srcLen.value.UInt64;
|
||||
kprintf("srclen: %lu\n", srcLen);
|
||||
src[srcLen]='\0';
|
||||
tryLast(lexan(src, filename), m_tokens)
|
||||
Autoarr(Token)* tokens=m_tokens.value.VoidPtr;
|
||||
return tokens;
|
||||
}
|
||||
char cb2c_version[]="0.0.1";
|
||||
|
||||
int main(const int argc, const char* const* argv){
|
||||
if(!setlocale(LC_ALL, "C.UTF8"))
|
||||
kprintf("\e[93msetlocale failed\n");
|
||||
i32 main(const int argc, const char* const* argv){
|
||||
if(setlocale(LC_CTYPE, "C.UTF-8")!=0)
|
||||
kprintf("\e[93msetlocale failed\e[37m\n");
|
||||
|
||||
bool compile=0, translate=0;
|
||||
char *compiler=NULL, *tranlation_out_dir=NULL;
|
||||
Autoarr(Pointer)* source_files=Autoarr_create(Pointer, 64, 64);
|
||||
bool compile=false;
|
||||
char* compiler=NULL;
|
||||
char* compilation_out_dir=NULL;
|
||||
bool translate=false;
|
||||
char* translation_traget=NULL; // single .c + single .h; namespace structured project
|
||||
char* translation_out_dir=NULL;
|
||||
|
||||
// command arguments
|
||||
if(argc==1){
|
||||
kprintf("\e[91mno arguments\e[37m\n");
|
||||
return -1;
|
||||
}
|
||||
for(int argi=1; argi<argc; argi++){
|
||||
char* a=argv[argi];
|
||||
kprintf("%s ", a);
|
||||
kprintf("%s\n", a);
|
||||
|
||||
if(cptr_compare(a, "-v") || cptr_compare(a, "--version")){
|
||||
kprintf(" cb2c v%s\n", cb2c_version);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if(cptr_compare(a, "-c") || cptr_compare(a, "--compiler")){
|
||||
compile=1;
|
||||
compile=true;
|
||||
kprintf(" compile=true\n");
|
||||
|
||||
if(argi+1>=argc || argv[argi+1][0]=='-')
|
||||
throw("after argument --compiler must be compiler name");
|
||||
|
||||
compiler=argv[++argi];
|
||||
kprintf(" compiler=%s\n", compiler);
|
||||
}
|
||||
else if(cptr_compare(a, "-t") || cptr_compare(a, "--tranlation-out-dir")){
|
||||
translate=1;
|
||||
|
||||
else if(cptr_compare(a, "-t") || cptr_compare(a, "--translate")){
|
||||
translate=true;
|
||||
kprintf(" translate=true\n");
|
||||
|
||||
if(argi+1>=argc || argv[argi+1][0]=='-')
|
||||
throw("after argument --translation-out-dir must be directory path");
|
||||
tranlation_out_dir=argv[argi+1];
|
||||
if(!dir_exists(tranlation_out_dir))
|
||||
dir_create(tranlation_out_dir);
|
||||
|
||||
translation_traget=argv[++argi];
|
||||
kprintf(" translation_traget=%s\n",translation_traget);
|
||||
}
|
||||
|
||||
else if(cptr_compare(a, "-to") || cptr_compare(a, "--tranlation-out-dir")){
|
||||
if(argi+1>=argc || argv[argi+1][0]=='-')
|
||||
throw("after argument --translation-out-dir must be directory path");
|
||||
|
||||
translation_out_dir=argv[++argi];
|
||||
kprintf(" translation_out_dir=%s\n", translation_out_dir);
|
||||
|
||||
if(!dir_exists(translation_out_dir)){
|
||||
dir_create(translation_out_dir);
|
||||
kprintf(" directory was created\n");
|
||||
}
|
||||
else kprintf(" directory exists\n");
|
||||
}
|
||||
|
||||
else if(cptr_compare(a, "-o") || cptr_compare(a, "--compilation-out-dir")){
|
||||
if(argi+1>=argc || argv[argi+1][0]=='-')
|
||||
throw("after argument --compilation-out-dir must be directory path");
|
||||
|
||||
compilation_out_dir=argv[++argi];
|
||||
kprintf(" compilation_out_dir=%s\n", compilation_out_dir);
|
||||
|
||||
if(!dir_exists(compilation_out_dir)){
|
||||
dir_create(compilation_out_dir);
|
||||
kprintf(" directory was created\n");
|
||||
}
|
||||
else kprintf(" directory exists\n");
|
||||
}
|
||||
|
||||
else {
|
||||
if(a[0]=='-'){
|
||||
kprintf(" \e[91minvalid argument: %s\e[37m\n", a);
|
||||
return -2;
|
||||
}
|
||||
if(file_exists(a)){
|
||||
Autoarr_add(source_files, a);
|
||||
kprintf(" found source file: %s\n", a);
|
||||
}
|
||||
else {
|
||||
kprintf(" \e[91mfile not found: %s\e[37m\n", a);
|
||||
return -3;
|
||||
}
|
||||
}
|
||||
kprintf("\n"); return 0;
|
||||
}
|
||||
|
||||
if(!compile && !translate){
|
||||
kprintf("\e[91moperation not specified: select --translate and/or --compile\e[37m\n");
|
||||
return -4;
|
||||
}
|
||||
if(Autoarr_length(source_files)==0){
|
||||
kprintf("\e[91mno source files specified\e[37m\n");
|
||||
return -5;
|
||||
}
|
||||
|
||||
// kerep type system
|
||||
kt_beginInit();
|
||||
kt_initKerepTypes();
|
||||
kt_initCbLexerTypes();
|
||||
kt_initLexerTypes();
|
||||
kt_endInit();
|
||||
// keywords search tree
|
||||
init_keywordsSearchTree();
|
||||
kprint(kp_s|kp_fgGray, "keywordsSearchTree: ", kp_h|kp_pre, keywordsSearchTree, kp_c, '\n');
|
||||
Autoarr(Token)* tokens=parseFile("src.cb");
|
||||
kprint_setColor(kp_fgGray);
|
||||
|
||||
Lexer* lexer=Lexer_create();
|
||||
|
||||
Autoarr_foreach(source_files, src_file_name, ({
|
||||
tryLast(Lexer_parseFile(lexer, src_file_name), m_tokens)
|
||||
Autoarr(Token)* tokens=m_tokens.value.VoidPtr;
|
||||
kprintf("tokens: %u\n", Autoarr_length(tokens));
|
||||
Autoarr_foreach(tokens, tok, kprintf("%u %s\n",tok.id, tok.value));
|
||||
STNode_free(keywordsSearchTree);
|
||||
NamespaceContext file_context={
|
||||
.base={
|
||||
.name=path_basename(src_file_name, false),
|
||||
.namespace=NULL,
|
||||
.parent=NULL,
|
||||
.type=ContextType_Namespace,
|
||||
.tokens=tokens
|
||||
}
|
||||
};
|
||||
tryLast(translateToC(&file_context.base), m_c_code)
|
||||
char* generated_code=m_c_code.value.VoidPtr;
|
||||
kprintf("%s", generated_code);
|
||||
|
||||
char* generated_file_name;
|
||||
if(file_context.base.namespace!=NULL)
|
||||
generated_file_name=cptr_concat(file_context.base.namespace,".",file_context.base.name,".g.c");
|
||||
else generated_file_name=cptr_concat(file_context.base.name,".g.c");
|
||||
tryLast(file_open(generated_file_name, FileOpenMode_Write), m_generated_file)
|
||||
File* generated_file=m_generated_file.value.VoidPtr;
|
||||
kprintf("created file %s\n", generated_file_name);
|
||||
tryLast(file_writeCptr(generated_file, generated_code),_m_1885);
|
||||
tryLast(file_close(generated_file),_m_14415);
|
||||
kprintf("source code has been written to the file\n");
|
||||
free(generated_code);
|
||||
free(generated_file_name);
|
||||
}));
|
||||
|
||||
Autoarr_free(source_files, true);
|
||||
Lexer_destroy(lexer);
|
||||
return 0;
|
||||
}
|
||||
184
src/lexer/analize.c
Normal file
184
src/lexer/analize.c
Normal file
@ -0,0 +1,184 @@
|
||||
#include "lexer.h"
|
||||
|
||||
Maybe _throw_wrongchar(Lexer* lex){
|
||||
char* _errline=string_extract(lex->line);
|
||||
char* _context=string_extract(lex->context);
|
||||
printf("\n\e[91mwrong char <%c> at [%s:%u:%u %s]\n >>> %s\n",
|
||||
lex->source[lex->charnum], lex->filename,lex->linenum,lex->charnum,_context, _errline);
|
||||
exit(96);
|
||||
}
|
||||
#define throw_wrongchar(freeMem) { freeMem; return _throw_wrongchar(lex); }
|
||||
|
||||
// adds <lex->label> to <lex->tokens> as tok_label or tok_number
|
||||
void _tryAddLabel(Lexer* lex){
|
||||
if(lex->label.length==0) return;
|
||||
|
||||
Unitype uni=ST_pullString(lex->keywordSearchTree, lex->label);
|
||||
if(uni.VoidPtr!=NULL) // built-in keyword
|
||||
Autoarr_add(lex->tokens, *(Token*)uni.VoidPtr);
|
||||
else { // user-defined lex->label
|
||||
Token ut;
|
||||
ut.value=string_extract(lex->label);
|
||||
ut.on_heap=true;
|
||||
switch(*lex->label.ptr){
|
||||
case '0': case '1': case '2': case '3': case '4':
|
||||
case '5': case '6': case '7': case '8': case '9':
|
||||
ut.id=tok_number;
|
||||
break;
|
||||
default:
|
||||
ut.id=tok_label;
|
||||
break;
|
||||
}
|
||||
Autoarr_add(lex->tokens, ut);
|
||||
}
|
||||
|
||||
lex->label=(string){lex->source, 0};
|
||||
};
|
||||
#define tryAddLabel() _tryAddLabel(lex)
|
||||
|
||||
#define addDefTok(id) { tryAddLabel(); Autoarr_add(lex->tokens, default_tokens[id]); }
|
||||
|
||||
void _addDefTok_ifnext(Lexer* lex, char next, TokenId yes, TokenId no){
|
||||
if(*(lex->source+1)==next){
|
||||
lex->source++;
|
||||
addDefTok(yes);
|
||||
}
|
||||
else addDefTok(no);
|
||||
}
|
||||
#define addDefTok_ifnext(nextChar, yesTok, noTok) _addDefTok_ifnext(lex, nextChar, yesTok, noTok)
|
||||
|
||||
// returns text in quotes or error
|
||||
Maybe _readString(Lexer* lex, char quotChar){
|
||||
char c;
|
||||
bool prevIsBackslash=false;
|
||||
char* srcFirst=lex->source;
|
||||
while ((c=*++lex->source)){
|
||||
if(c==quotChar) {
|
||||
if(prevIsBackslash) // \"
|
||||
prevIsBackslash=false;
|
||||
else { // "
|
||||
string str={srcFirst, lex->source-srcFirst+1};
|
||||
char* extracted=string_extract(str);
|
||||
return SUCCESS(UniHeapPtr(char, extracted));
|
||||
}
|
||||
}
|
||||
else prevIsBackslash= c=='\\' && !prevIsBackslash;
|
||||
}
|
||||
lex->source=srcFirst;
|
||||
throw_wrongchar(;);
|
||||
}
|
||||
#define readString(quotChar) _readString(lex, quotChar)
|
||||
|
||||
|
||||
Maybe __Lexer_analize(Lexer* lex, char* _filename, char* _source){
|
||||
lex->filename=_filename;
|
||||
lex->source=_source;
|
||||
lex->tokens=Autoarr_create(Token, 64, 1024);
|
||||
lex->label=(string){_source, 0};
|
||||
lex->line=(string){_source, 0};
|
||||
lex->linenum=0;
|
||||
lex->charnum=0;
|
||||
|
||||
char c;
|
||||
lex->source--;
|
||||
while ((c=*++(lex->source))) {
|
||||
// tryAddLabel() resets lex->label.length, but lex->label.ptr still pointing to prev token beginning
|
||||
if(lex->label.length==0)
|
||||
lex->label.ptr=lex->source;
|
||||
|
||||
switch(c){
|
||||
case ' ': case '\t':
|
||||
case '\r': case '\n':
|
||||
tryAddLabel();
|
||||
break;
|
||||
|
||||
case '(': addDefTok(tok_lbracket); break;
|
||||
case '{': addDefTok(tok_lbracket_fi); break;
|
||||
case '[': addDefTok(tok_lbracket_sq); break;
|
||||
case ')': addDefTok(tok_rbracket); break;
|
||||
case '}': addDefTok(tok_rbracket_fi); break;
|
||||
case ']': addDefTok(tok_rbracket_sq); break;
|
||||
|
||||
case '\'':
|
||||
tryAddLabel();
|
||||
try(readString('\''), maybeC, ;){
|
||||
Token ctok={
|
||||
.id=tok_character,
|
||||
.value=(char*)maybeC.value.VoidPtr
|
||||
};
|
||||
Autoarr_add(lex->tokens, ctok);
|
||||
}
|
||||
break;
|
||||
case '"':
|
||||
tryAddLabel();
|
||||
try(readString('"'), maybeS, ;){
|
||||
Token stok={
|
||||
.id=tok_string,
|
||||
.value=(char*)maybeS.value.VoidPtr
|
||||
};
|
||||
Autoarr_add(lex->tokens, stok);
|
||||
}
|
||||
break;
|
||||
|
||||
case '<': addDefTok(tok_less); break;
|
||||
case '>': addDefTok(tok_more); break;
|
||||
case '+': addDefTok(tok_plus); break;
|
||||
case '-': addDefTok(tok_minus); break;
|
||||
case '*': addDefTok(tok_asterisk); break;
|
||||
|
||||
case '/':
|
||||
tryAddLabel();
|
||||
string commentStr={lex->source, 1};
|
||||
c=*++lex->source;
|
||||
if(c=='/') { // single-lex->line comment
|
||||
commentStr.length++;
|
||||
while((c=*++lex->source)){
|
||||
if(c=='\n' || c=='\r') break;
|
||||
else commentStr.length++;
|
||||
}
|
||||
}
|
||||
else if(c=='*') { // multi-lex->line comment
|
||||
commentStr.length++; // first slash
|
||||
while((c=*++lex->source)){
|
||||
commentStr.length++;
|
||||
if(c=='*' && *(++lex->source)=='/') break;
|
||||
}
|
||||
commentStr.length++; // last slash
|
||||
}
|
||||
else { // not comment
|
||||
lex->source--;
|
||||
addDefTok(tok_slash);
|
||||
break;
|
||||
}
|
||||
Token comTok={
|
||||
.value=string_extract(commentStr),
|
||||
.id=tok_comment
|
||||
};
|
||||
Autoarr_add(lex->tokens, comTok);
|
||||
break;
|
||||
|
||||
case '=': addDefTok_ifnext('=', tok_equal, tok_assign); break;
|
||||
case '!': addDefTok_ifnext('=', tok_not_equal, tok_not); break;
|
||||
case '&': addDefTok_ifnext('&', tok_and_d, tok_and); break;
|
||||
case '|': addDefTok_ifnext('|', tok_or_d, tok_or); break;
|
||||
case '?': addDefTok_ifnext('?', tok_question_d, tok_question);break;
|
||||
case ':': addDefTok(tok_colon); break;
|
||||
case ';': addDefTok(tok_semicolon); break;
|
||||
case '.': addDefTok(tok_point); break;
|
||||
case ',': addDefTok(tok_comma); break;
|
||||
case '~': addDefTok(tok_tilda); break;
|
||||
case '\\':addDefTok(tok_backslash); break;
|
||||
case '%': addDefTok(tok_percent); break;
|
||||
case '^': addDefTok(tok_xor); break;
|
||||
case '$': addDefTok(tok_dollar); break;
|
||||
case '@': addDefTok(tok_at); break;
|
||||
|
||||
default:
|
||||
lex->label.length++;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
tryAddLabel();
|
||||
return SUCCESS(UniHeapPtr(Autoarr(Token), lex->tokens));
|
||||
}
|
||||
@ -5,15 +5,15 @@
|
||||
|
||||
|
||||
PACKED_ENUM(ContextType,
|
||||
CT_Namespace,
|
||||
CT_Class,
|
||||
CT_Function
|
||||
ContextType_Namespace,
|
||||
ContextType_Class,
|
||||
ContextType_Function
|
||||
)
|
||||
|
||||
typedef struct Context Context;
|
||||
STRUCT(Context,
|
||||
char* name;
|
||||
Context* parent;
|
||||
char* namespace; /* nullable */
|
||||
Context* parent; /* nullable */
|
||||
Autoarr(Token)* tokens;
|
||||
ContextType type;
|
||||
)
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
#include "lexer.h"
|
||||
|
||||
void kt_initCbLexerTypes(){
|
||||
void kt_initLexerTypes(){
|
||||
kt_register(Token);
|
||||
kt_register(Autoarr_Token);
|
||||
}
|
||||
|
||||
@ -1,207 +1,43 @@
|
||||
#include "lexer.h"
|
||||
#include "../../kerep/src/String/string.h"
|
||||
#include "../../kerep/src/Filesystem/filesystem.h"
|
||||
|
||||
STRUCT(SharedLexerData,
|
||||
char* _source;
|
||||
char* _filename;
|
||||
Autoarr(Token)* _tokens;
|
||||
string _context;
|
||||
string _line;
|
||||
string _label;
|
||||
u32 _linenum;
|
||||
u32 _charnum;
|
||||
)
|
||||
Maybe __Lexer_analize(Lexer* lex, char* _filename, char* _source);
|
||||
|
||||
#define source sld->_source
|
||||
#define filename sld->_filename
|
||||
#define tokens sld->_tokens
|
||||
#define context sld->_context
|
||||
#define line sld->_line
|
||||
#define label sld->_label
|
||||
#define linenum sld->_linenum
|
||||
#define charnum sld->_charnum
|
||||
|
||||
Maybe _throw_wrongchar(SharedLexerData* sld){
|
||||
char* errline=string_extract(line);
|
||||
char* _context=string_extract(context);
|
||||
printf("\n\e[91mwrong char <%c> at [%s:%u:%u %s]\n >>> %s\n",
|
||||
source[charnum], filename,linenum,charnum,_context, errline);
|
||||
exit(96);
|
||||
Lexer* Lexer_create(){
|
||||
Lexer* lex=malloc(sizeof(*lex));
|
||||
*lex=(Lexer){0};
|
||||
lex->analize=__Lexer_analize;
|
||||
lex->keywordSearchTree=STNode_create();
|
||||
for(TokenId keywordId=0; keywordId<=tok_typeof; keywordId++){
|
||||
const Token* keywordptr=&default_tokens[keywordId];
|
||||
Unitype uni=UniStackPtr(Token, keywordptr);
|
||||
ST_push(lex->keywordSearchTree, keywordptr->value, uni);
|
||||
}
|
||||
return lex;
|
||||
}
|
||||
#define throw_wrongchar(freeMem) { freeMem; return _throw_wrongchar(sld); }
|
||||
|
||||
// adds <label> to <tokens> as tok_label or tok_number
|
||||
void _tryAddLabel(SharedLexerData* sld){
|
||||
if(label.length==0) return;
|
||||
|
||||
Unitype uni=ST_pullString(keywordsSearchTree, label);
|
||||
if(uni.VoidPtr!=NULL) // built-in keyword
|
||||
Autoarr_add(tokens, *(Token*)uni.VoidPtr);
|
||||
else { // user-defined label
|
||||
Token ut;
|
||||
ut.value=string_extract(label);
|
||||
switch(*label.ptr){
|
||||
case '0': case '1': case '2': case '3': case '4':
|
||||
case '5': case '6': case '7': case '8': case '9':
|
||||
ut.id=tok_number;
|
||||
break;
|
||||
default:
|
||||
ut.id=tok_label;
|
||||
break;
|
||||
}
|
||||
Autoarr_add(tokens, ut);
|
||||
}
|
||||
|
||||
label=(string){source, 0};
|
||||
};
|
||||
#define tryAddLabel() _tryAddLabel(sld)
|
||||
|
||||
#define addDefTok(id) tryAddLabel(); Autoarr_add(tokens, default_tokens[id])
|
||||
|
||||
void _addDefTok_ifnext(char next, TokenId yes, TokenId no, SharedLexerData* sld){
|
||||
if(*(source+1)==next){
|
||||
source++;
|
||||
addDefTok(yes);
|
||||
}
|
||||
else addDefTok(no);
|
||||
void Lexer_freeMembers(void* _l){
|
||||
Lexer* lex=(Lexer*)_l;
|
||||
STNode_free(lex->keywordSearchTree);
|
||||
}
|
||||
#define addDefTok_ifnext(nextChar, yesTok, noTok) _addDefTok_ifnext(nextChar, yesTok, noTok, sld)
|
||||
|
||||
// returns text in quotes or error
|
||||
Maybe _readString(char quotChar, SharedLexerData* sld){
|
||||
char c;
|
||||
bool prevIsBackslash=false;
|
||||
char* srcFirst=source;
|
||||
while ((c=*++source)){
|
||||
if(c==quotChar) {
|
||||
if(prevIsBackslash) // \"
|
||||
prevIsBackslash=false;
|
||||
else { // "
|
||||
string str={srcFirst, source-srcFirst+1};
|
||||
char* extracted=string_extract(str);
|
||||
return SUCCESS(UniHeapPtr(char, extracted));
|
||||
}
|
||||
}
|
||||
else prevIsBackslash= c=='\\' && !prevIsBackslash;
|
||||
}
|
||||
source=srcFirst;
|
||||
throw_wrongchar(;);
|
||||
kt_define(Lexer, Lexer_freeMembers, NULL)
|
||||
|
||||
void Lexer_destroy(Lexer* lex){
|
||||
Lexer_freeMembers(lex);
|
||||
free(lex);
|
||||
}
|
||||
#define readString(quotChar) _readString(quotChar, sld)
|
||||
|
||||
|
||||
Maybe _lexan(SharedLexerData* sld){
|
||||
char c;
|
||||
source--;
|
||||
while ((c=*++source)) {
|
||||
// tryAddLabel() resets label.length, but label.ptr still pointing to prev token beginning
|
||||
if(label.length==0)
|
||||
label.ptr=source;
|
||||
|
||||
switch(c){
|
||||
case ' ': case '\t':
|
||||
case '\r': case '\n':
|
||||
tryAddLabel();
|
||||
break;
|
||||
|
||||
case '(': addDefTok(tok_lbracket); break;
|
||||
case '{': addDefTok(tok_lbracket_fi); break;
|
||||
case '[': addDefTok(tok_lbracket_sq); break;
|
||||
case ')': addDefTok(tok_rbracket); break;
|
||||
case '}': addDefTok(tok_rbracket_fi); break;
|
||||
case ']': addDefTok(tok_rbracket_sq); break;
|
||||
|
||||
case '\'':
|
||||
tryAddLabel();
|
||||
try(readString('\''), maybeC, ;){
|
||||
Token ctok={
|
||||
.id=tok_character,
|
||||
.value=(char*)maybeC.value.VoidPtr
|
||||
};
|
||||
Autoarr_add(tokens, ctok);
|
||||
}
|
||||
break;
|
||||
case '"':
|
||||
tryAddLabel();
|
||||
try(readString('"'), maybeS, ;){
|
||||
Token stok={
|
||||
.id=tok_string,
|
||||
.value=(char*)maybeS.value.VoidPtr
|
||||
};
|
||||
Autoarr_add(tokens, stok);
|
||||
}
|
||||
break;
|
||||
|
||||
case '<': addDefTok(tok_less); break;
|
||||
case '>': addDefTok(tok_more); break;
|
||||
case '+': addDefTok(tok_plus); break;
|
||||
case '-': addDefTok(tok_minus); break;
|
||||
case '*': addDefTok(tok_asterisk); break;
|
||||
|
||||
case '/':
|
||||
tryAddLabel();
|
||||
string commentStr={source, 1};
|
||||
c=*++source;
|
||||
if(c=='/') { // single-line comment
|
||||
commentStr.length++;
|
||||
while((c=*++source)){
|
||||
if(c=='\n' || c=='\r') break;
|
||||
else commentStr.length++;
|
||||
}
|
||||
}
|
||||
else if(c=='*') { // multi-line comment
|
||||
commentStr.length++;
|
||||
while((c=*++source)){
|
||||
commentStr.length++;
|
||||
if(c=='*' && *(++source)=='/') break;
|
||||
}
|
||||
}
|
||||
else { // not comment
|
||||
source--;
|
||||
addDefTok(tok_slash);
|
||||
break;
|
||||
}
|
||||
Token comTok={
|
||||
.value=string_extract(commentStr),
|
||||
.id=tok_comment
|
||||
};
|
||||
Autoarr_add(tokens, comTok);
|
||||
break;
|
||||
|
||||
case '=': addDefTok_ifnext('=', tok_equals, tok_assign); break;
|
||||
case '!': addDefTok_ifnext('=', tok_not_equals, tok_not); break;
|
||||
case '&': addDefTok_ifnext('&', tok_and_d, tok_and); break;
|
||||
case '|': addDefTok_ifnext('|', tok_or_d, tok_or); break;
|
||||
case '?': addDefTok_ifnext('?', tok_question_d, tok_question);break;
|
||||
case ':': addDefTok(tok_colon); break;
|
||||
case ';': addDefTok(tok_semicolon); break;
|
||||
case '.': addDefTok(tok_point); break;
|
||||
case ',': addDefTok(tok_comma); break;
|
||||
case '~': addDefTok(tok_tilda); break;
|
||||
case '\\': addDefTok(tok_backslash); break;
|
||||
case '%': addDefTok(tok_percent); break;
|
||||
case '^': addDefTok(tok_xor); break;
|
||||
case '$': addDefTok(tok_dollar); break;
|
||||
case '@': addDefTok(tok_at); break;
|
||||
|
||||
default:
|
||||
label.length++;
|
||||
break;
|
||||
}
|
||||
}
|
||||
///@return Maybe<Autoarr(Token)*>
|
||||
Maybe Lexer_parseFile(Lexer* lex, char* src_file_name){
|
||||
try(file_open(src_file_name, FileOpenMode_Read), m_src_file,;)
|
||||
File* src_file=m_src_file.value.VoidPtr;
|
||||
char* src_text;
|
||||
try(file_readAll(src_file, &src_text), m_src_len, file_close(src_file))
|
||||
u64 src_len=m_src_len.value.UInt64;
|
||||
kprintf("srclen: %lu\n", src_len);
|
||||
try(Lexer_parseText(lex, src_file_name, src_text), m_tokens, file_close(src_file))
|
||||
Autoarr(Token)* tokens=m_tokens.value.VoidPtr;
|
||||
return SUCCESS(UniHeapPtr(Autoarr(Token), tokens));
|
||||
}
|
||||
|
||||
|
||||
Maybe lexan(char* _source, char* _filename){
|
||||
SharedLexerData sld={
|
||||
._source=_source,
|
||||
._filename=_filename,
|
||||
._tokens=Autoarr_create(Token, 64, 1024),
|
||||
._label={_source, 0},
|
||||
._line={_source, 0},
|
||||
._linenum=0,
|
||||
._charnum=0
|
||||
};
|
||||
return _lexan(&sld);
|
||||
}
|
||||
@ -3,7 +3,30 @@
|
||||
#include "tokens.h"
|
||||
#include "context.h"
|
||||
|
||||
//Autoarr(Token)*
|
||||
Maybe lexan(char* source, char* filename);
|
||||
void kt_initLexerTypes();
|
||||
|
||||
void kt_initCbLexerTypes();
|
||||
STRUCT(Lexer,
|
||||
STNode* keywordSearchTree;
|
||||
Maybe (*analize)(Lexer* lex, char* src_file_name, char* src_file_text);
|
||||
|
||||
char* source;
|
||||
char* filename;
|
||||
Autoarr(Token)* tokens;
|
||||
string context;
|
||||
string line;
|
||||
string label;
|
||||
u32 linenum;
|
||||
u32 charnum;
|
||||
)
|
||||
|
||||
Lexer* Lexer_create();
|
||||
void Lexer_destroy(Lexer* lex);
|
||||
///@return Maybe<Autoarr(Token)*>
|
||||
|
||||
///@return Maybe<Autoarr(Token)*>
|
||||
Maybe Lexer_parseFile(Lexer* lex, char* src_file_name);
|
||||
|
||||
///@return Maybe<Autoarr(Token)*>
|
||||
static inline Maybe Lexer_parseText(Lexer* lex, char* src_file_name, char* src_file_text){
|
||||
return lex->analize(lex, src_file_name, src_file_text);
|
||||
}
|
||||
|
||||
@ -12,14 +12,3 @@ char* Token_toString(void* _t, u32 fmt){
|
||||
|
||||
kt_define(Token, Token_freeMembers, Token_toString);
|
||||
Autoarr_define(Token, false)
|
||||
|
||||
|
||||
STNode* keywordsSearchTree=NULL;
|
||||
void init_keywordsSearchTree(){
|
||||
keywordsSearchTree=STNode_create();
|
||||
for(TokenId keywordId=0; keywordId<=tok_typeof; keywordId++){
|
||||
const Token* keywordptr=&default_tokens[keywordId];
|
||||
Unitype uni=UniStackPtr(Token, keywordptr);
|
||||
ST_push(keywordsSearchTree, keywordptr->value, uni);
|
||||
}
|
||||
}
|
||||
|
||||
@ -31,8 +31,8 @@ PACKED_ENUM(TokenId,
|
||||
tok_while,
|
||||
tok_switch,
|
||||
tok_case,
|
||||
tok_brake,
|
||||
tok_braketo,
|
||||
tok_break,
|
||||
tok_breakto,
|
||||
tok_return,
|
||||
tok_goto,
|
||||
/* declaration keywords */
|
||||
@ -72,8 +72,8 @@ PACKED_ENUM(TokenId,
|
||||
tok_slash, /* / */
|
||||
tok_assign, /* = */
|
||||
tok_not, /* ! */
|
||||
tok_equals, /* == */
|
||||
tok_not_equals, /* != */
|
||||
tok_equal, /* == */
|
||||
tok_not_equal, /* != */
|
||||
tok_and, /* & */
|
||||
tok_and_d, /* && */
|
||||
tok_or, /* | */
|
||||
@ -106,97 +106,92 @@ STRUCT(Token,
|
||||
)
|
||||
Autoarr_declare(Token)
|
||||
|
||||
extern STNode* keywordsSearchTree;
|
||||
// dont forget to free it
|
||||
void init_keywordsSearchTree();
|
||||
|
||||
|
||||
static const Token default_tokens[]={
|
||||
/* base types */
|
||||
{"void", tok_void_t},
|
||||
{"i8", tok_i8_t},
|
||||
{"u8", tok_u8_t},
|
||||
{"i16", tok_i16_t},
|
||||
{"u16", tok_u16_t},
|
||||
{"i32", tok_i32_t},
|
||||
{"u32", tok_u32_t},
|
||||
{"i64", tok_i64_t},
|
||||
{"u64", tok_u64_t},
|
||||
{"char", tok_char_t},
|
||||
{"char8", tok_char8_t},
|
||||
{"char16", tok_char16_t},
|
||||
{"bool", tok_bool_t},
|
||||
{"f32", tok_f32_t},
|
||||
{"f64", tok_f64_t},
|
||||
{"void", tok_void_t, 0},
|
||||
{"i8", tok_i8_t, 0},
|
||||
{"u8", tok_u8_t, 0},
|
||||
{"i16", tok_i16_t, 0},
|
||||
{"u16", tok_u16_t, 0},
|
||||
{"i32", tok_i32_t, 0},
|
||||
{"u32", tok_u32_t, 0},
|
||||
{"i64", tok_i64_t, 0},
|
||||
{"u64", tok_u64_t, 0},
|
||||
{"char", tok_char_t, 0},
|
||||
{"char8", tok_char8_t, 0},
|
||||
{"char16", tok_char16_t, 0},
|
||||
{"bool", tok_bool_t, 0},
|
||||
{"f32", tok_f32_t, 0},
|
||||
{"f64", tok_f64_t, 0},
|
||||
/* constant */
|
||||
{"null", tok_null},
|
||||
{"true", tok_true},
|
||||
{"false", tok_false},
|
||||
{"null", tok_null, 0},
|
||||
{"true", tok_true, 0},
|
||||
{"false", tok_false, 0},
|
||||
/* control flow operators */
|
||||
{"if", tok_if},
|
||||
{"else", tok_else},
|
||||
{"for", tok_for},
|
||||
{"while", tok_while},
|
||||
{"switch", tok_switch},
|
||||
{"case", tok_case},
|
||||
{"brake", tok_brake},
|
||||
{"braketo", tok_braketo},
|
||||
{"return", tok_return},
|
||||
{"goto", tok_goto},
|
||||
{"if", tok_if, 0},
|
||||
{"else", tok_else, 0},
|
||||
{"for", tok_for, 0},
|
||||
{"while", tok_while, 0},
|
||||
{"switch", tok_switch, 0},
|
||||
{"case", tok_case, 0},
|
||||
{"brake", tok_break, 0},
|
||||
{"braketo", tok_breakto, 0},
|
||||
{"return", tok_return, 0},
|
||||
{"goto", tok_goto, 0},
|
||||
/* declaration keywords */
|
||||
{"class", tok_class},
|
||||
{"struct", tok_struct},
|
||||
{"enum", tok_enum},
|
||||
{"union", tok_union},
|
||||
{"event", tok_event},
|
||||
{"import", tok_import},
|
||||
{"alias", tok_alias},
|
||||
{"class", tok_class, 0},
|
||||
{"struct", tok_struct, 0},
|
||||
{"enum", tok_enum, 0},
|
||||
{"union", tok_union, 0},
|
||||
{"event", tok_event, 0},
|
||||
{"import", tok_import, 0},
|
||||
{"alias", tok_alias, 0},
|
||||
/* access modifiers */
|
||||
{"static", tok_static},
|
||||
{"const", tok_const},
|
||||
{"readonly", tok_readonly},
|
||||
{"protected", tok_protected},
|
||||
{"internal", tok_internal},
|
||||
{"public", tok_public},
|
||||
{"virtual", tok_virtual},
|
||||
{"override", tok_override},
|
||||
{"partial", tok_partial},
|
||||
{"static", tok_static, 0},
|
||||
{"const", tok_const, 0},
|
||||
{"readonly", tok_readonly, 0},
|
||||
{"protected", tok_protected, 0},
|
||||
{"internal", tok_internal, 0},
|
||||
{"public", tok_public, 0},
|
||||
{"virtual", tok_virtual, 0},
|
||||
{"override", tok_override, 0},
|
||||
{"partial", tok_partial, 0},
|
||||
/* allocation keywords */
|
||||
{"new", tok_new},
|
||||
{"sizeof", tok_sizeof},
|
||||
{"typeof", tok_typeof},
|
||||
{"new", tok_new, 0},
|
||||
{"sizeof", tok_sizeof, 0},
|
||||
{"typeof", tok_typeof, 0},
|
||||
/* special characters */
|
||||
{"(", tok_lbracket},
|
||||
{"{", tok_lbracket_fi},
|
||||
{"[", tok_lbracket_sq},
|
||||
{")", tok_rbracket},
|
||||
{"}", tok_rbracket_fi},
|
||||
{"]", tok_rbracket_sq},
|
||||
{"<", tok_less},
|
||||
{">", tok_more},
|
||||
{"+", tok_plus},
|
||||
{"-", tok_minus},
|
||||
{"*", tok_asterisk},
|
||||
{"/", tok_slash},
|
||||
{"=", tok_assign},
|
||||
{"!", tok_not},
|
||||
{"==", tok_equals},
|
||||
{"!=", tok_not_equals},
|
||||
{"&", tok_and},
|
||||
{"&&", tok_and_d},
|
||||
{"|", tok_or},
|
||||
{"||", tok_or_d},
|
||||
{"?", tok_question},
|
||||
{"??", tok_question_d},
|
||||
{":", tok_colon},
|
||||
{";", tok_semicolon},
|
||||
{".", tok_point},
|
||||
{",", tok_comma},
|
||||
{"~", tok_tilda},
|
||||
{"\\", tok_backslash},
|
||||
{"%", tok_percent},
|
||||
{"^", tok_xor},
|
||||
{"#", tok_lattice},
|
||||
{"$", tok_dollar},
|
||||
{"@", tok_at}
|
||||
{"(", tok_lbracket, 0},
|
||||
{"{", tok_lbracket_fi, 0},
|
||||
{"[", tok_lbracket_sq, 0},
|
||||
{")", tok_rbracket, 0},
|
||||
{"}", tok_rbracket_fi, 0},
|
||||
{"]", tok_rbracket_sq, 0},
|
||||
{"<", tok_less, 0},
|
||||
{">", tok_more, 0},
|
||||
{"+", tok_plus, 0},
|
||||
{"-", tok_minus, 0},
|
||||
{"*", tok_asterisk, 0},
|
||||
{"/", tok_slash, 0},
|
||||
{"=", tok_assign, 0},
|
||||
{"!", tok_not, 0},
|
||||
{"==", tok_equal, 0},
|
||||
{"!=", tok_not_equal, 0},
|
||||
{"&", tok_and, 0},
|
||||
{"&&", tok_and_d, 0},
|
||||
{"|", tok_or, 0},
|
||||
{"||", tok_or_d, 0},
|
||||
{"?", tok_question, 0},
|
||||
{"??", tok_question_d, 0},
|
||||
{":", tok_colon, 0},
|
||||
{";", tok_semicolon, 0},
|
||||
{".", tok_point, 0},
|
||||
{",", tok_comma, 0},
|
||||
{"~", tok_tilda, 0},
|
||||
{"\\", tok_backslash, 0},
|
||||
{"%", tok_percent, 0},
|
||||
{"^", tok_xor, 0},
|
||||
{"#", tok_lattice, 0},
|
||||
{"$", tok_dollar, 0},
|
||||
{"@", tok_at, 0}
|
||||
};
|
||||
|
||||
Loading…
Reference in New Issue
Block a user