Compare commits

..

2 Commits

Author SHA1 Message Date
bd8215fd73 data tokens lexing 2024-11-21 12:03:53 +05:00
ad232f187a semicolon 2024-11-21 09:48:54 +05:00
3 changed files with 104 additions and 15 deletions

View File

@ -162,7 +162,8 @@ static void readLabel(Compiler* cmp){
return;
}
if(!isAlphabeticalLower(c) && !isAlphabeticalUpper(c) && !isDigit(c)){
if(!isAlphabeticalLower(c) && !isAlphabeticalUpper(c) && !isDigit(c) &&
c != '_' && c != '.'){
setError(Error_unexpectedCharacter(c));
return;
}
@ -187,7 +188,7 @@ static void readArguments(Compiler* cmp){
while(cmp->pos < cmp->code_len){
c = cmp->code[cmp->pos];
// end of line
if(c == '\r' || c == '\n'){
if(c == '\r' || c == '\n' || c == ';'){
tok.length = cmp->pos - tok.begin;
if(tok.length > 0)
List_Token_push(&cmp->tokens, tok);
@ -222,7 +223,7 @@ static void readInstruction(Compiler* cmp){
while(cmp->pos < cmp->code_len){
char c = cmp->code[cmp->pos];
// end of line
if(c == '\r' || c == '\n'){
if(c == '\r' || c == '\n' || c == ';'){
tok.length = cmp->pos - tok.begin;
List_Token_push(&cmp->tokens, tok);
// cmp->line will be increased in lex()
@ -251,6 +252,83 @@ static void readInstruction(Compiler* cmp){
List_Token_push(&cmp->tokens, tok);
}
static void readChar(Compiler* cmp){
Token tok = Token_construct(TokenType_Char, cmp->pos, 0);
cmp->pos++;
cmp->column++;
while(cmp->pos < cmp->code_len){
char c = cmp->code[cmp->pos];
// end of line
if(c == '\r' || c == '\n'){
setError(Error_unexpectedCharacter(cmp->code[--cmp->pos]));
return;
}
if(c == '\''){
tok.length = cmp->pos - tok.begin + 1;
List_Token_push(&cmp->tokens, tok);
return;
}
cmp->column++;
cmp->pos++;
}
// end of file
setError(Error_endOfFile);
}
static void readString(Compiler* cmp){
Token tok = Token_construct(TokenType_String, cmp->pos, 0);
cmp->pos++;
cmp->column++;
while(cmp->pos < cmp->code_len){
char c = cmp->code[cmp->pos];
// end of line
if(c == '\r' || c == '\n'){
setError(Error_unexpectedCharacter(cmp->code[--cmp->pos]));
return;
}
if(c == '"'){
tok.length = cmp->pos - tok.begin + 1;
List_Token_push(&cmp->tokens, tok);
return;
}
cmp->column++;
cmp->pos++;
}
// end of file
setError(Error_endOfFile);
}
static void readNumber(Compiler* cmp){
Token tok = Token_construct(TokenType_Number, cmp->pos, 0);
cmp->pos++;
cmp->column++;
while(cmp->pos < cmp->code_len){
char c = cmp->code[cmp->pos];
if(c == '\r' || c == '\n' || c == ' ' || c == '\t' || c == ',' || c == ';'){
tok.length = cmp->pos - tok.begin;
List_Token_push(&cmp->tokens, tok);
return;
}
cmp->column++;
cmp->pos++;
}
// end of file
tok.length = cmp->pos - tok.begin;
List_Token_push(&cmp->tokens, tok);
}
static bool lex(Compiler* cmp){
returnErrorIf_auto(cmp->state != CompilerState_Initial);
cmp->state = CompilerState_Lexing;
@ -270,13 +348,20 @@ static bool lex(Compiler* cmp){
case '.':
readLabel(cmp);
break;
case '"':
readString(cmp);
break;
case '\'':
readChar(cmp);
break;
default:
// try read instruction
if(isAlphabeticalLower(c) || isAlphabeticalUpper(c)){
if(isAlphabeticalLower(c) || isAlphabeticalUpper(c))
readInstruction(cmp);
break;
}
else if(isDigit(c))
readNumber(cmp);
else returnError(Error_unexpectedCharacter(c));
break;
}
if(cmp->state == CompilerState_Error)
@ -358,9 +443,8 @@ bool Compiler_compileTasm(Compiler* cmp, cstr source_file_name, cstr out_file_na
char* tokstr = malloc(4096);
strncpy(tokstr, cmp->code + t.begin, t.length);
tokstr[t.length] = 0;
printf("[l:%u, c:%u](pos:%u, size:%u) %s '%s'\n",
printf("[l:%3u, c:%3u] %s '%s'\n",
pos.line, pos.column,
t.begin, t.length,
TokenType_toString(t.type), tokstr);
free(tokstr);
}

View File

@ -1,15 +1,19 @@
#include "token.h"
static cstr TokenType_str[] = {
static cstr _TokenType_str[] = {
"Unset",
"SingleLineComment",
"MultiLineComment",
"Label",
"Instruction",
"Argument",
"Data",
"Number",
"Char",
"String",
};
cstr TokenType_toString(TokenType t){
return TokenType_str[t];
if(t >= sizeof(_TokenType_str) / sizeof(cstr))
return "!!INDEX_ERROR!!";
return _TokenType_str[t];
}

View File

@ -8,16 +8,17 @@ typedef enum TokenType {
TokenType_Label,
TokenType_Instruction,
TokenType_Argument,
TokenType_Data,
/* there is a place for 2 values left (TokenType must occupy 4 bits) */
TokenType_Number,
TokenType_Char,
TokenType_String,
} TokenType;
cstr TokenType_toString(TokenType t);
typedef struct Token {
u32 begin; // some index in Compiler->code
u32 length : 28; // length in characters (28 bits)
TokenType type : 4; // type of token (4 bits)
u32 length : 24; // length in characters (24 bits)
TokenType type : 8; // type of token (8 bits)
} Token;
#define Token_construct(TYPE, BEGIN, END) ((Token){ .type = TYPE, .begin = BEGIN, .length = END })