cobek/lexer.c
2022-03-24 15:24:46 +03:00

250 lines
6.4 KiB
C

#include "cobek_compiler.h"
Autoarr_define(Token);
char charFromEscapeStr(string estr){
if(*estr.ptr!='\\') throw("first char is not \\");
switch(*(++estr.ptr)){
case 'n': return '\n';
case 'r': return '\r';
case 't': return '\t';
case 'e': return '\e';
case '\\': return '\\';
case '"': return '"';
case '\'': return '\'';
default: throw(ERR_WRONGCHAR);
}
}
Autoarr(Token)* lexan(char* source, char* filename){
Autoarr(Token)* tokens=malloc(sizeof(Autoarr(Token)));
*tokens=Autoarr_create(Token,64,1024);
char c;
string label={source,0};
string line={source,0};
uint32 linenum;
uint32 cnum;
void throw_wrongchar(){
char* errline=string_cpToCharPtr(line);
printf("\n\e[91mwrong char <%c> at [%s:%u:%u %s]\n >>> %s\n" ,filename,linenum,cnum,c,errline);
exit(96);
};
void addlabel(){
if(label.length==0) return;
Unitype token_ptr_u=ST_pull_str(label);
// user-defined label
if(token_ptr_u.type==Null){
Token ut={
.id=tok_label,
.value=string_cpToCharPtr(label)
};
switch(*label.ptr){
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
ut.id=tok_number;
break;
default:
break;
}
Autoarr_add(tokens, ut);
}
// built-in keyword
else Autoarr_add(tokens, (*token_ptr_u.VoidPtr));
label={source,0};
};
addtok(TokenId id){
Autoarr_add(tokens, default_tokens[id]);
}
addtok_ifnext(char next, TokenId yes, TokenId no){
if(*(source+1)==next){
addtok(yes);
source++;
}
else addtok(no);
}
while ((c=*source++)) switch(c){
case ' ': case '\t':
case '\r': case '\n':
addlabel();
break;
case '(':
addlabel();
addtok(tok_lbracket);
break;
case '{':
addlabel();
addtok(tok_lbracket_fi);
break;
case '[':
addlabel();
addtok(tok_lbracket_sq);
break;
case ')':
addlabel();
addtok(tok_rbracket);
break;
case '}':
addlabel();
addtok(tok_rbracket_fi);
break;
case ']':
addlabel();
addtok(tok_rbracket_sq);
break;
case '\'':
addlabel();
Token ctok={
.id=tok_char ,
.value={++source,1}
};
if(*source=='\\'){
ctok.value=malloc(1);
*ctok.value=charFromEscapeStr(source++,2);
}
if(*(++source)!='\'')
throw_wrongchar();
Autoarr_add(tokens, ctok);
break;
case '"':
addlabel();
break;
case '<':
addlabel();
addtok(tok_less);
break;
case '>':
addlabel();
addtok(tok_more);
break;
case '+':
addlabel();
addtok(tok_plus);
break;
case '-':
addlabel();
addtok(tok_minus);
break;
case '*':
addlabel();
addtok(tok_asterisk);
break;
case '/':
addlabel();
if(*(++source)=='/'){
label={++source,0};
while((c=*(source++))
if(c=='\n'){
// removes \r from the end of comment line
if(*(source-2)=='\r')
label.length--;
goto add_comment;
}
else label.length++;
add_comment:
Token comt={
.id=tok_comment,
.value=label
}
Autoarr_add(tokens,comt);
}
else if(*source=='*'){
label={++source,0};
while((c=*(source++))
if(c=='*'&&*(source++)=='/')
goto add_comment;
else label.length++;
add_comment:
Token comt={
.id=tok_comment,
.value=label
}
Autoarr_add(tokens,comt);
}
else{
source--;
addtok(tok_slash);
}
break;
case '=':
addlabel();
addtok_ifnext('=',tok_equals,tok_assign);
break;
case '!':
addlabel();
addtok_ifnext('=',tok_not_equals,tok_not);
break;
case '&':
addlabel();
addtok_ifnext('&',tok_,tok_);
break;
case '|':
addlabel();
addtok_ifnext('|',tok_or,tok_or_d);
break;
case '?':
addlabel();
addtok_ifnext('?',tok_question,tok_question_d);
break;
case ':':
addlabel();
addtok(tok_colon);
break;
case ';':
addlabel();
addtok(tok_semicolon);
break;
case '.':
addlabel();
addtok(tok_point);
break;
case ',':
addlabel();
addtok(tok_comma);
break;
case '~':
addlabel();
addtok(tok_tilda);
break;
case '\\':
addlabel();
addtok(tok_backslash);
break;
case '%':
addlabel();
addtok(tok_percent);
break;
case '^':
addlabel();
addtok(tok_xor);
break;
case '$':
addlabel();
addtok(tok_dollar);
break;
case '@':
addlabel();
addtok(tok_at);
break;
default:
label.length++;
break;
}
return tokens;
}