data tokens lexing

This commit is contained in:
Timerix 2024-11-21 12:03:53 +05:00
parent ad232f187a
commit bd8215fd73
3 changed files with 102 additions and 12 deletions

View File

@ -162,7 +162,8 @@ static void readLabel(Compiler* cmp){
return; return;
} }
if(!isAlphabeticalLower(c) && !isAlphabeticalUpper(c) && !isDigit(c)){ if(!isAlphabeticalLower(c) && !isAlphabeticalUpper(c) && !isDigit(c) &&
c != '_' && c != '.'){
setError(Error_unexpectedCharacter(c)); setError(Error_unexpectedCharacter(c));
return; return;
} }
@ -251,6 +252,83 @@ static void readInstruction(Compiler* cmp){
List_Token_push(&cmp->tokens, tok); List_Token_push(&cmp->tokens, tok);
} }
static void readChar(Compiler* cmp){
Token tok = Token_construct(TokenType_Char, cmp->pos, 0);
cmp->pos++;
cmp->column++;
while(cmp->pos < cmp->code_len){
char c = cmp->code[cmp->pos];
// end of line
if(c == '\r' || c == '\n'){
setError(Error_unexpectedCharacter(cmp->code[--cmp->pos]));
return;
}
if(c == '\''){
tok.length = cmp->pos - tok.begin + 1;
List_Token_push(&cmp->tokens, tok);
return;
}
cmp->column++;
cmp->pos++;
}
// end of file
setError(Error_endOfFile);
}
static void readString(Compiler* cmp){
Token tok = Token_construct(TokenType_String, cmp->pos, 0);
cmp->pos++;
cmp->column++;
while(cmp->pos < cmp->code_len){
char c = cmp->code[cmp->pos];
// end of line
if(c == '\r' || c == '\n'){
setError(Error_unexpectedCharacter(cmp->code[--cmp->pos]));
return;
}
if(c == '"'){
tok.length = cmp->pos - tok.begin + 1;
List_Token_push(&cmp->tokens, tok);
return;
}
cmp->column++;
cmp->pos++;
}
// end of file
setError(Error_endOfFile);
}
static void readNumber(Compiler* cmp){
Token tok = Token_construct(TokenType_Number, cmp->pos, 0);
cmp->pos++;
cmp->column++;
while(cmp->pos < cmp->code_len){
char c = cmp->code[cmp->pos];
if(c == '\r' || c == '\n' || c == ' ' || c == '\t' || c == ',' || c == ';'){
tok.length = cmp->pos - tok.begin;
List_Token_push(&cmp->tokens, tok);
return;
}
cmp->column++;
cmp->pos++;
}
// end of file
tok.length = cmp->pos - tok.begin;
List_Token_push(&cmp->tokens, tok);
}
static bool lex(Compiler* cmp){ static bool lex(Compiler* cmp){
returnErrorIf_auto(cmp->state != CompilerState_Initial); returnErrorIf_auto(cmp->state != CompilerState_Initial);
cmp->state = CompilerState_Lexing; cmp->state = CompilerState_Lexing;
@ -270,13 +348,20 @@ static bool lex(Compiler* cmp){
case '.': case '.':
readLabel(cmp); readLabel(cmp);
break; break;
case '"':
readString(cmp);
break;
case '\'':
readChar(cmp);
break;
default: default:
// try read instruction // try read instruction
if(isAlphabeticalLower(c) || isAlphabeticalUpper(c)){ if(isAlphabeticalLower(c) || isAlphabeticalUpper(c))
readInstruction(cmp); readInstruction(cmp);
break; else if(isDigit(c))
} readNumber(cmp);
else returnError(Error_unexpectedCharacter(c)); else returnError(Error_unexpectedCharacter(c));
break;
} }
if(cmp->state == CompilerState_Error) if(cmp->state == CompilerState_Error)
@ -358,7 +443,7 @@ bool Compiler_compileTasm(Compiler* cmp, cstr source_file_name, cstr out_file_na
char* tokstr = malloc(4096); char* tokstr = malloc(4096);
strncpy(tokstr, cmp->code + t.begin, t.length); strncpy(tokstr, cmp->code + t.begin, t.length);
tokstr[t.length] = 0; tokstr[t.length] = 0;
printf("[l:%2u, c:%2u] %s '%s'\n", printf("[l:%3u, c:%3u] %s '%s'\n",
pos.line, pos.column, pos.line, pos.column,
TokenType_toString(t.type), tokstr); TokenType_toString(t.type), tokstr);
free(tokstr); free(tokstr);

View File

@ -1,15 +1,19 @@
#include "token.h" #include "token.h"
static cstr TokenType_str[] = { static cstr _TokenType_str[] = {
"Unset", "Unset",
"SingleLineComment", "SingleLineComment",
"MultiLineComment", "MultiLineComment",
"Label", "Label",
"Instruction", "Instruction",
"Argument", "Argument",
"Data", "Number",
"Char",
"String",
}; };
cstr TokenType_toString(TokenType t){ cstr TokenType_toString(TokenType t){
return TokenType_str[t]; if(t >= sizeof(_TokenType_str) / sizeof(cstr))
return "!!INDEX_ERROR!!";
return _TokenType_str[t];
} }

View File

@ -8,16 +8,17 @@ typedef enum TokenType {
TokenType_Label, TokenType_Label,
TokenType_Instruction, TokenType_Instruction,
TokenType_Argument, TokenType_Argument,
TokenType_Data, TokenType_Number,
/* there is a place for 2 values left (TokenType must occupy 4 bits) */ TokenType_Char,
TokenType_String,
} TokenType; } TokenType;
cstr TokenType_toString(TokenType t); cstr TokenType_toString(TokenType t);
typedef struct Token { typedef struct Token {
u32 begin; // some index in Compiler->code u32 begin; // some index in Compiler->code
u32 length : 28; // length in characters (28 bits) u32 length : 24; // length in characters (24 bits)
TokenType type : 4; // type of token (4 bits) TokenType type : 8; // type of token (8 bits)
} Token; } Token;
#define Token_construct(TYPE, BEGIN, END) ((Token){ .type = TYPE, .begin = BEGIN, .length = END }) #define Token_construct(TYPE, BEGIN, END) ((Token){ .type = TYPE, .begin = BEGIN, .length = END })