diff --git a/src/compiler/compiler.c b/src/compiler/compiler.c index f636241..c4130f9 100644 --- a/src/compiler/compiler.c +++ b/src/compiler/compiler.c @@ -162,7 +162,8 @@ static void readLabel(Compiler* cmp){ return; } - if(!isAlphabeticalLower(c) && !isAlphabeticalUpper(c) && !isDigit(c)){ + if(!isAlphabeticalLower(c) && !isAlphabeticalUpper(c) && !isDigit(c) && + c != '_' && c != '.'){ setError(Error_unexpectedCharacter(c)); return; } @@ -251,6 +252,83 @@ static void readInstruction(Compiler* cmp){ List_Token_push(&cmp->tokens, tok); } +static void readChar(Compiler* cmp){ + Token tok = Token_construct(TokenType_Char, cmp->pos, 0); + cmp->pos++; + cmp->column++; + + while(cmp->pos < cmp->code_len){ + char c = cmp->code[cmp->pos]; + // end of line + if(c == '\r' || c == '\n'){ + setError(Error_unexpectedCharacter(cmp->code[--cmp->pos])); + return; + } + + if(c == '\''){ + tok.length = cmp->pos - tok.begin + 1; + List_Token_push(&cmp->tokens, tok); + return; + } + + cmp->column++; + cmp->pos++; + } + + // end of file + setError(Error_endOfFile); +} + +static void readString(Compiler* cmp){ + Token tok = Token_construct(TokenType_String, cmp->pos, 0); + cmp->pos++; + cmp->column++; + + while(cmp->pos < cmp->code_len){ + char c = cmp->code[cmp->pos]; + // end of line + if(c == '\r' || c == '\n'){ + setError(Error_unexpectedCharacter(cmp->code[--cmp->pos])); + return; + } + + if(c == '"'){ + tok.length = cmp->pos - tok.begin + 1; + List_Token_push(&cmp->tokens, tok); + return; + } + + cmp->column++; + cmp->pos++; + } + + // end of file + setError(Error_endOfFile); +} + +static void readNumber(Compiler* cmp){ + Token tok = Token_construct(TokenType_Number, cmp->pos, 0); + cmp->pos++; + cmp->column++; + + while(cmp->pos < cmp->code_len){ + char c = cmp->code[cmp->pos]; + + if(c == '\r' || c == '\n' || c == ' ' || c == '\t' || c == ',' || c == ';'){ + tok.length = cmp->pos - tok.begin; + List_Token_push(&cmp->tokens, tok); + return; + } + + cmp->column++; + cmp->pos++; + } + + // end of file + tok.length = cmp->pos - tok.begin; + List_Token_push(&cmp->tokens, tok); +} + static bool lex(Compiler* cmp){ returnErrorIf_auto(cmp->state != CompilerState_Initial); cmp->state = CompilerState_Lexing; @@ -270,13 +348,20 @@ static bool lex(Compiler* cmp){ case '.': readLabel(cmp); break; + case '"': + readString(cmp); + break; + case '\'': + readChar(cmp); + break; default: // try read instruction - if(isAlphabeticalLower(c) || isAlphabeticalUpper(c)){ + if(isAlphabeticalLower(c) || isAlphabeticalUpper(c)) readInstruction(cmp); - break; - } + else if(isDigit(c)) + readNumber(cmp); else returnError(Error_unexpectedCharacter(c)); + break; } if(cmp->state == CompilerState_Error) @@ -358,7 +443,7 @@ bool Compiler_compileTasm(Compiler* cmp, cstr source_file_name, cstr out_file_na char* tokstr = malloc(4096); strncpy(tokstr, cmp->code + t.begin, t.length); tokstr[t.length] = 0; - printf("[l:%2u, c:%2u] %s '%s'\n", + printf("[l:%3u, c:%3u] %s '%s'\n", pos.line, pos.column, TokenType_toString(t.type), tokstr); free(tokstr); diff --git a/src/compiler/token.c b/src/compiler/token.c index bc4f836..29ffecd 100644 --- a/src/compiler/token.c +++ b/src/compiler/token.c @@ -1,15 +1,19 @@ #include "token.h" -static cstr TokenType_str[] = { +static cstr _TokenType_str[] = { "Unset", "SingleLineComment", "MultiLineComment", "Label", "Instruction", "Argument", - "Data", + "Number", + "Char", + "String", }; cstr TokenType_toString(TokenType t){ - return TokenType_str[t]; + if(t >= sizeof(_TokenType_str) / sizeof(cstr)) + return "!!INDEX_ERROR!!"; + return _TokenType_str[t]; } diff --git a/src/compiler/token.h b/src/compiler/token.h index 7ee5dde..8e3d744 100644 --- a/src/compiler/token.h +++ b/src/compiler/token.h @@ -8,16 +8,17 @@ typedef enum TokenType { TokenType_Label, TokenType_Instruction, TokenType_Argument, - TokenType_Data, - /* there is a place for 2 values left (TokenType must occupy 4 bits) */ + TokenType_Number, + TokenType_Char, + TokenType_String, } TokenType; cstr TokenType_toString(TokenType t); typedef struct Token { u32 begin; // some index in Compiler->code - u32 length : 28; // length in characters (28 bits) - TokenType type : 4; // type of token (4 bits) + u32 length : 24; // length in characters (24 bits) + TokenType type : 8; // type of token (8 bits) } Token; #define Token_construct(TYPE, BEGIN, END) ((Token){ .type = TYPE, .begin = BEGIN, .length = END })