250 lines
6.4 KiB
C
250 lines
6.4 KiB
C
#include "cobek_compiler.h"
|
|
|
|
Autoarr_define(Token);
|
|
|
|
|
|
char charFromEscapeStr(string estr){
|
|
if(*estr.ptr!='\\') throw("first char is not \\");
|
|
switch(*(++estr.ptr)){
|
|
case 'n': return '\n';
|
|
case 'r': return '\r';
|
|
case 't': return '\t';
|
|
case 'e': return '\e';
|
|
case '\\': return '\\';
|
|
case '"': return '"';
|
|
case '\'': return '\'';
|
|
default: throw(ERR_WRONGCHAR);
|
|
}
|
|
}
|
|
|
|
|
|
Autoarr(Token)* lexan(char* source, char* filename){
|
|
Autoarr(Token)* tokens=malloc(sizeof(Autoarr(Token)));
|
|
*tokens=Autoarr_create(Token,64,1024);
|
|
char c;
|
|
string label={source,0};
|
|
string line={source,0};
|
|
uint32 linenum;
|
|
uint32 cnum;
|
|
|
|
|
|
void throw_wrongchar(){
|
|
char* errline=string_cpToCharPtr(line);
|
|
printf("\n\e[91mwrong char <%c> at [%s:%u:%u %s]\n >>> %s\n" ,filename,linenum,cnum,c,errline);
|
|
exit(96);
|
|
};
|
|
|
|
|
|
void addlabel(){
|
|
if(label.length==0) return;
|
|
Unitype token_ptr_u=ST_pull_str(label);
|
|
// user-defined label
|
|
if(token_ptr_u.type==Null){
|
|
Token ut={
|
|
.id=tok_label,
|
|
.value=string_cpToCharPtr(label)
|
|
};
|
|
switch(*label.ptr){
|
|
case '0': case '1': case '2': case '3': case '4':
|
|
case '5': case '6': case '7': case '8': case '9':
|
|
ut.id=tok_number;
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
Autoarr_add(tokens, ut);
|
|
}
|
|
// built-in keyword
|
|
else Autoarr_add(tokens, (*token_ptr_u.VoidPtr));
|
|
|
|
label={source,0};
|
|
};
|
|
|
|
|
|
addtok(TokenId id){
|
|
Autoarr_add(tokens, default_tokens[id]);
|
|
}
|
|
|
|
|
|
addtok_ifnext(char next, TokenId yes, TokenId no){
|
|
if(*(source+1)==next){
|
|
addtok(yes);
|
|
source++;
|
|
}
|
|
else addtok(no);
|
|
}
|
|
|
|
|
|
while ((c=*source++)) switch(c){
|
|
case ' ': case '\t':
|
|
case '\r': case '\n':
|
|
addlabel();
|
|
break;
|
|
case '(':
|
|
addlabel();
|
|
addtok(tok_lbracket);
|
|
break;
|
|
case '{':
|
|
addlabel();
|
|
addtok(tok_lbracket_fi);
|
|
break;
|
|
case '[':
|
|
addlabel();
|
|
addtok(tok_lbracket_sq);
|
|
break;
|
|
case ')':
|
|
addlabel();
|
|
addtok(tok_rbracket);
|
|
break;
|
|
case '}':
|
|
addlabel();
|
|
addtok(tok_rbracket_fi);
|
|
break;
|
|
case ']':
|
|
addlabel();
|
|
addtok(tok_rbracket_sq);
|
|
break;
|
|
case '\'':
|
|
addlabel();
|
|
Token ctok={
|
|
.id=tok_char ,
|
|
.value={++source,1}
|
|
};
|
|
if(*source=='\\'){
|
|
ctok.value=malloc(1);
|
|
*ctok.value=charFromEscapeStr(source++,2);
|
|
}
|
|
if(*(++source)!='\'')
|
|
throw_wrongchar();
|
|
Autoarr_add(tokens, ctok);
|
|
break;
|
|
case '"':
|
|
addlabel();
|
|
|
|
|
|
break;
|
|
case '<':
|
|
addlabel();
|
|
addtok(tok_less);
|
|
break;
|
|
case '>':
|
|
addlabel();
|
|
addtok(tok_more);
|
|
break;
|
|
case '+':
|
|
addlabel();
|
|
addtok(tok_plus);
|
|
break;
|
|
case '-':
|
|
addlabel();
|
|
addtok(tok_minus);
|
|
break;
|
|
case '*':
|
|
addlabel();
|
|
addtok(tok_asterisk);
|
|
break;
|
|
case '/':
|
|
addlabel();
|
|
if(*(++source)=='/'){
|
|
label={++source,0};
|
|
while((c=*(source++))
|
|
if(c=='\n'){
|
|
// removes \r from the end of comment line
|
|
if(*(source-2)=='\r')
|
|
label.length--;
|
|
goto add_comment;
|
|
}
|
|
else label.length++;
|
|
add_comment:
|
|
Token comt={
|
|
.id=tok_comment,
|
|
.value=label
|
|
}
|
|
Autoarr_add(tokens,comt);
|
|
}
|
|
else if(*source=='*'){
|
|
label={++source,0};
|
|
while((c=*(source++))
|
|
if(c=='*'&&*(source++)=='/')
|
|
goto add_comment;
|
|
else label.length++;
|
|
add_comment:
|
|
Token comt={
|
|
.id=tok_comment,
|
|
.value=label
|
|
}
|
|
Autoarr_add(tokens,comt);
|
|
}
|
|
else{
|
|
source--;
|
|
addtok(tok_slash);
|
|
}
|
|
break;
|
|
case '=':
|
|
addlabel();
|
|
addtok_ifnext('=',tok_equals,tok_assign);
|
|
break;
|
|
case '!':
|
|
addlabel();
|
|
addtok_ifnext('=',tok_not_equals,tok_not);
|
|
break;
|
|
case '&':
|
|
addlabel();
|
|
addtok_ifnext('&',tok_,tok_);
|
|
break;
|
|
case '|':
|
|
addlabel();
|
|
addtok_ifnext('|',tok_or,tok_or_d);
|
|
break;
|
|
case '?':
|
|
addlabel();
|
|
addtok_ifnext('?',tok_question,tok_question_d);
|
|
break;
|
|
case ':':
|
|
addlabel();
|
|
addtok(tok_colon);
|
|
break;
|
|
case ';':
|
|
addlabel();
|
|
addtok(tok_semicolon);
|
|
break;
|
|
case '.':
|
|
addlabel();
|
|
addtok(tok_point);
|
|
break;
|
|
case ',':
|
|
addlabel();
|
|
addtok(tok_comma);
|
|
break;
|
|
case '~':
|
|
addlabel();
|
|
addtok(tok_tilda);
|
|
break;
|
|
case '\\':
|
|
addlabel();
|
|
addtok(tok_backslash);
|
|
break;
|
|
case '%':
|
|
addlabel();
|
|
addtok(tok_percent);
|
|
break;
|
|
case '^':
|
|
addlabel();
|
|
addtok(tok_xor);
|
|
break;
|
|
case '$':
|
|
addlabel();
|
|
addtok(tok_dollar);
|
|
break;
|
|
case '@':
|
|
addlabel();
|
|
addtok(tok_at);
|
|
break;
|
|
default:
|
|
label.length++;
|
|
break;
|
|
}
|
|
|
|
return tokens;
|
|
}
|