This commit is contained in:
Timerix22 2022-07-21 01:18:55 +03:00
parent ae8e5c92f9
commit 6098820644
6 changed files with 289 additions and 262 deletions

View File

@ -1,6 +1,9 @@
#include "copler.h" #include "copler.h"
#include "../lexer/lexer.h"
int main(){ int main(){
init_keywordsSearchTree();
STNode_free(keywordsSearchTree);
return 0; return 0;
} }

View File

@ -1,8 +1,7 @@
#include "lexer.h" #include "lexer.h"
#include "../../../kerep/src/String/string.h"
Autoarr_define(Token); typedef struct SharedLexerData{
typedef struct SourceFileInfo{
char* _source; char* _source;
char* _filename; char* _filename;
Autoarr(Token)* _tokens; Autoarr(Token)* _tokens;
@ -11,42 +10,257 @@ typedef struct SourceFileInfo{
string _label; string _label;
uint32 _linenum; uint32 _linenum;
uint32 _charnum; uint32 _charnum;
} SourceFileInfo; } SharedLexerData;
#define source sfi->_source #define source sld->_source
#define filename sfi->_filename #define filename sld->_filename
#define tokens sfi->_tokens #define tokens sld->_tokens
#define context sfi->_context #define context sld->_context
#define line sfi->_line #define line sld->_line
#define label sfi->_label #define label sld->_label
#define linenum sfi->_linenum #define linenum sld->_linenum
#define charnum sfi->_charnum #define charnum sld->_charnum
Maybe _throw_wrongchar(SourceFileInfo* sfi){ Maybe _throw_wrongchar(SharedLexerData* sld){
char* errline=string_extract(line); char* errline=string_extract(line);
char* _context=string_extract(context);
printf("\n\e[91mwrong char <%c> at [%s:%u:%u %s]\n >>> %s\n", printf("\n\e[91mwrong char <%c> at [%s:%u:%u %s]\n >>> %s\n",
source[charnum], filename,linenum,charnum,context, errline); source[charnum], filename,linenum,charnum,context, errline);
exit(96); exit(96);
} }
#define throw_wrongchar(freeMem) { freeMem; _throw_wrongchar(sfi); } #define throw_wrongchar(freeMem) { freeMem; return _throw_wrongchar(sld); }
char _charFromEscapeStr(string estr, SourceFileInfo* sfi){ #define addTok(id) Autoarr_add(tokens, default_tokens[id])
if(*estr.ptr!='\\') throw("first char is not \\");
switch(*(++estr.ptr)){ void _addTok_ifnext(char next, TokenId yes, TokenId no, SharedLexerData* sld){
case 'n': return '\n'; if(*(++source)==next){
case 'r': return '\r'; addTok(yes);
case 't': return '\t'; }
case 'e': return '\e'; else {
case '\\': return '\\'; source--;
case '"': return '"'; addTok(no);
case '\'': return '\'';
default: throw_wrongchar(;);
} }
} }
#define charFromEscapeStr(estr) _charFromEscapeStr(estr, sfi) #define addTok_ifnext(nextChar, yesTok, noTok) _addTok_ifnext(nextChar, yesTok, noTok, sld)
Autoarr(Token)* lexan(char* _source, char* _filename){ // adds <label> to <tokens> as tok_label or tok_number
SourceFileInfo sfi={ void _tryAddLabel(SharedLexerData* sld){
if(label.length==0) return;
Unitype fake_uni=ST_pullString(keywordsSearchTree,label);
if(fake_uni.VoidPtr!=NULL) // built-in keyword
Autoarr_add(tokens, *(Token*)(void*)&fake_uni);
else { // user-defined label
Token ut;
ut.value=string_extract(label);
switch(*label.ptr){
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
ut.id=tok_number;
break;
default:
ut.id=tok_label;
break;
}
Autoarr_add(tokens, ut);
}
label=(string){source,0};
};
#define tryAddLabel() _tryAddLabel(sld)
// returns text in quotes or error
Maybe _readString(char quotChar, SharedLexerData* sld){
char c;
bool prevIsBackslash=false;
char* srcFirst=source;
while ((c=*++source)){
if(c==quotChar) {
if(prevIsBackslash) // \"
prevIsBackslash=false;
else { // "
string str={srcFirst, source-srcFirst+1};
char* extracted=string_extract(str);
return SUCCESS(UniPtr(CharPtr, extracted));
}
}
else prevIsBackslash= c=='\\' && !prevIsBackslash;
}
source=srcFirst;
throw_wrongchar(;);
}
#define readString(quotChar) _readString(quotChar, sld)
Maybe _lexan(SharedLexerData* sld){
char c;
source--;
while ((c=*++source)) switch(c){
case ' ': case '\t':
case '\r': case '\n':
tryAddLabel();
break;
case '(':
tryAddLabel();
addTok(tok_lbracket);
break;
case '{':
tryAddLabel();
addTok(tok_lbracket_fi);
break;
case '[':
tryAddLabel();
addTok(tok_lbracket_sq);
break;
case ')':
tryAddLabel();
addTok(tok_rbracket);
break;
case '}':
tryAddLabel();
addTok(tok_rbracket_fi);
break;
case ']':
tryAddLabel();
addTok(tok_rbracket_sq);
break;
case '\'':
tryAddLabel();
try(readString('\''), maybeC, ;){
Token ctok={
.id=tok_character,
.value=(char*)maybeC.value.VoidPtr
};
Autoarr_add(tokens, ctok);
}
break;
case '"':
tryAddLabel();
try(readString('"'), maybeS, ;){
Token stok={
.id=tok_string,
.value=(char*)maybeS.value.VoidPtr
};
Autoarr_add(tokens, stok);
}
break;
case '<':
tryAddLabel();
addTok(tok_less);
break;
case '>':
tryAddLabel();
addTok(tok_more);
break;
case '+':
tryAddLabel();
addTok(tok_plus);
break;
case '-':
tryAddLabel();
addTok(tok_minus);
break;
case '*':
tryAddLabel();
addTok(tok_asterisk);
break;
case '/':
tryAddLabel();
string commentStr={source,0};
c=*++source;
if(c=='/') { // single-line comment
while((c=*++source)){
if(c=='\n' || c=='\r') break;
else commentStr.length++;
}
}
else if(c=='*') { // multi-line comment
while((c=*++source)){
commentStr.length++;
if(c=='*' && *(++source)=='/') break;
}
}
else { // not comment
source--;
addTok(tok_slash);
break;
}
Token comTok={
.value=string_extract(commentStr),
.id=tok_comment
};
Autoarr_add(tokens,comTok);
break;
case '=':
tryAddLabel();
addTok_ifnext('=',tok_equals,tok_assign);
break;
case '!':
tryAddLabel();
addTok_ifnext('=',tok_not_equals,tok_not);
break;
case '&':
tryAddLabel();
addTok_ifnext('&',tok_and_d,tok_and);
break;
case '|':
tryAddLabel();
addTok_ifnext('|',tok_or_d,tok_or);
break;
case '?':
tryAddLabel();
addTok_ifnext('?',tok_question_d,tok_question);
break;
case ':':
tryAddLabel();
addTok(tok_colon);
break;
case ';':
tryAddLabel();
addTok(tok_semicolon);
break;
case '.':
tryAddLabel();
addTok(tok_point);
break;
case ',':
tryAddLabel();
addTok(tok_comma);
break;
case '~':
tryAddLabel();
addTok(tok_tilda);
break;
case '\\':
tryAddLabel();
addTok(tok_backslash);
break;
case '%':
tryAddLabel();
addTok(tok_percent);
break;
case '^':
tryAddLabel();
addTok(tok_xor);
break;
case '$':
tryAddLabel();
addTok(tok_dollar);
break;
case '@':
tryAddLabel();
addTok(tok_at);
break;
default:
label.length++;
break;
}
return SUCCESS(UniPtr(AutoarrTokenPtr,tokens));
}
Maybe lexan(char* _source, char* _filename){
SharedLexerData sld={
._source=_source, ._source=_source,
._filename=_filename, ._filename=_filename,
._tokens=Autoarr_create(Token,64,1024), ._tokens=Autoarr_create(Token,64,1024),
@ -55,220 +269,5 @@ Autoarr(Token)* lexan(char* _source, char* _filename){
._linenum=0, ._linenum=0,
._charnum=0 ._charnum=0
}; };
return _lexan(&sfi); return _lexan(&sld);
} }
Autoarr(Token)* _lexan(SourceFileInfo* sfi){
void addlabel(){
if(label.length==0) return;
Unitype token_ptr_u=ST_pull_str(label);
// user-defined label
if(token_ptr_u.type==Null){
Token ut={
.id=tok_label,
.value=string_cpToCharPtr(label)
};
switch(*label.ptr){
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
ut.id=tok_number;
break;
default:
break;
}
Autoarr_add(tokens, ut);
}
// built-in keyword
else Autoarr_add(tokens, (*token_ptr_u.VoidPtr));
label={source,0};
};
addtok(TokenId id){
Autoarr_add(tokens, default_tokens[id]);
}
addtok_ifnext(char next, TokenId yes, TokenId no){
if(*(source+1)==next){
addtok(yes);
source++;
}
else addtok(no);
}
while ((c=*source++)) switch(c){
case ' ': case '\t':
case '\r': case '\n':
addlabel();
break;
case '(':
addlabel();
addtok(tok_lbracket);
break;
case '{':
addlabel();
addtok(tok_lbracket_fi);
break;
case '[':
addlabel();
addtok(tok_lbracket_sq);
break;
case ')':
addlabel();
addtok(tok_rbracket);
break;
case '}':
addlabel();
addtok(tok_rbracket_fi);
break;
case ']':
addlabel();
addtok(tok_rbracket_sq);
break;
case '\'':
addlabel();
Token ctok={
.id=tok_char ,
.value={++source,1}
};
if(*source=='\\'){
ctok.value=malloc(1);
*ctok.value=charFromEscapeStr(source++,2);
}
if(*(++source)!='\'')
throw_wrongchar();
Autoarr_add(tokens, ctok);
break;
case '"':
addlabel();
break;
case '<':
addlabel();
addtok(tok_less);
break;
case '>':
addlabel();
addtok(tok_more);
break;
case '+':
addlabel();
addtok(tok_plus);
break;
case '-':
addlabel();
addtok(tok_minus);
break;
case '*':
addlabel();
addtok(tok_asterisk);
break;
case '/':
addlabel();
if(*(++source)=='/'){
label={++source,0};
while((c=*(source++))
if(c=='\n'){
// removes \r from the end of comment line
if(*(source-2)=='\r')
label.length--;
goto add_comment;
}
else label.length++;
add_comment:
Token comt={
.id=tok_comment,
.value=label
}
Autoarr_add(tokens,comt);
}
else if(*source=='*'){
label={++source,0};
while((c=*(source++))
if(c=='*'&&*(source++)=='/')
goto add_comment;
else label.length++;
add_comment:
Token comt={
.id=tok_comment,
.value=label
}
Autoarr_add(tokens,comt);
}
else{
source--;
addtok(tok_slash);
}
break;
case '=':
addlabel();
addtok_ifnext('=',tok_equals,tok_assign);
break;
case '!':
addlabel();
addtok_ifnext('=',tok_not_equals,tok_not);
break;
case '&':
addlabel();
addtok_ifnext('&',tok_,tok_);
break;
case '|':
addlabel();
addtok_ifnext('|',tok_or,tok_or_d);
break;
case '?':
addlabel();
addtok_ifnext('?',tok_question,tok_question_d);
break;
case ':':
addlabel();
addtok(tok_colon);
break;
case ';':
addlabel();
addtok(tok_semicolon);
break;
case '.':
addlabel();
addtok(tok_point);
break;
case ',':
addlabel();
addtok(tok_comma);
break;
case '~':
addlabel();
addtok(tok_tilda);
break;
case '\\':
addlabel();
addtok(tok_backslash);
break;
case '%':
addlabel();
addtok(tok_percent);
break;
case '^':
addlabel();
addtok(tok_xor);
break;
case '$':
addlabel();
addtok(tok_dollar);
break;
case '@':
addlabel();
addtok(tok_at);
break;
default:
label.length++;
break;
}
return tokens;
}

View File

@ -1,9 +1,6 @@
#pragma once #pragma once
#include "../../../kerep/src/Autoarr/Autoarr.h"
#include "../../../kerep/src/String/string.h"
#include "tokens.h" #include "tokens.h"
Autoarr_declare(Token) //Autoarr(Token)*
Maybe lexan(char* source, char* filename);
Autoarr(Token)* lexan(char* source, char* filename);

7
src/lexer/my_type_ext.h Normal file
View File

@ -0,0 +1,7 @@
#pragma once
#include "../../../kerep/src/base/base.h"
typedef enum lexer_type{
AutoarrTokenPtr=my_type_last+1
} lexer_type;

12
src/lexer/tokens.c Normal file
View File

@ -0,0 +1,12 @@
#include "tokens.h"
Autoarr_define(Token)
void init_keywordsSearchTree(){
keywordsSearchTree=STNode_create();
for(TokenId keywordId=0; keywordId<=tok_typeof; keywordId++){
Token keyword=default_tokens[keywordId];
Unitype fake_uni=*(Unitype*)(void*)&keyword;
ST_push(keywordsSearchTree, keyword.value, fake_uni);
}
}

View File

@ -1,6 +1,8 @@
#pragma once #pragma once
#include "../../../kerep/src/base/base.h" #include "../../../kerep/src/Autoarr/Autoarr.h"
#include "../../../kerep/src/SearchTree/SearchTree.h"
#include "my_type_ext.h"
typedef enum TokenId{ typedef enum TokenId{
// base types // base types
@ -35,6 +37,7 @@ typedef enum TokenId{
tok_return, tok_return,
tok_goto, tok_goto,
// declaration keywords // declaration keywords
tok_class,
tok_struct, tok_struct,
tok_enum, tok_enum,
tok_union, tok_union,
@ -55,12 +58,6 @@ typedef enum TokenId{
tok_new, // allocates struct in heap tok_new, // allocates struct in heap
tok_sizeof, // size of variable value tok_sizeof, // size of variable value
tok_typeof, // type of variable tok_typeof, // type of variable
// user-defined
tok_label,
tok_number,
tok_character,
tok_string,
tok_comment,
// special characters // special characters
tok_lbracket, // ( tok_lbracket, // (
tok_lbracket_fi, // { tok_lbracket_fi, // {
@ -68,8 +65,6 @@ typedef enum TokenId{
tok_rbracket, // ) tok_rbracket, // )
tok_rbracket_fi, // } tok_rbracket_fi, // }
tok_rbracket_sq, // ] tok_rbracket_sq, // ]
//tok_quot, // '
//tok_quot_d, // "
tok_less, // < tok_less, // <
tok_more, // > tok_more, // >
tok_plus, // + tok_plus, // +
@ -96,7 +91,13 @@ typedef enum TokenId{
tok_xor, // ^ tok_xor, // ^
tok_lattice, // # tok_lattice, // #
tok_dollar, // $ tok_dollar, // $
tok_at // @ tok_at, // @
// user-defined
tok_label,
tok_number,
tok_character,
tok_string,
tok_comment
} __attribute__((__packed__)) TokenId; } __attribute__((__packed__)) TokenId;
typedef struct Token{ typedef struct Token{
@ -104,6 +105,11 @@ typedef struct Token{
TokenId id; TokenId id;
} Token; } Token;
static STNode* keywordsSearchTree;
// dont forget to free it
void init_keywordsSearchTree();
static const Token default_tokens[]={ static const Token default_tokens[]={
// base types // base types
{"void", tok_void_t}, {"void", tok_void_t},
@ -137,6 +143,7 @@ static const Token default_tokens[]={
{"return", tok_return}, {"return", tok_return},
{"goto", tok_goto}, {"goto", tok_goto},
// declaration keywords // declaration keywords
{"class", tok_class},
{"struct", tok_struct}, {"struct", tok_struct},
{"enum", tok_enum}, {"enum", tok_enum},
{"union", tok_union}, {"union", tok_union},
@ -192,3 +199,5 @@ static const Token default_tokens[]={
{"$", tok_dollar}, {"$", tok_dollar},
{"@", tok_at} {"@", tok_at}
}; };
Autoarr_declare(Token)