TCPU/src/compiler/Lexer.c

270 lines
7.5 KiB
C

#include "Compiler_internal.h"
#define setError(FORMAT, ...) {\
completeLine(cmp);\
Compiler_setError(cmp, FORMAT, ##__VA_ARGS__);\
}
#define Error_unexpectedCharacter(C) "unexpected character '%c'", C
#define Error_endOfFile "unexpected end of file"
static void completeLine(Compiler* cmp){
List_u32_push(&cmp->line_lengths, cmp->column);
cmp->column = 0;
}
static void readCommentSingleLine(Compiler* cmp){
char c; // '/'
Token tok = Token_construct(TokenType_SingleLineComment, cmp->pos - 1, 0);
cmp->column++;
cmp->pos++;
while(cmp->pos < cmp->code.len){
c = cmp->code.data[cmp->pos];
// end of line
if(c == '\r' || c == '\n'){
tok.length = cmp->pos - tok.begin;
List_Token_push(&cmp->tokens, tok);
// cmp->line will be increased in lex()
return;
}
cmp->column++;
cmp->pos++;
}
// end of file
tok.length = cmp->pos - tok.begin;
List_Token_push(&cmp->tokens, tok);
}
static void readCommentMultiLine(Compiler* cmp){
char c; // '*'
Token tok = Token_construct(TokenType_MultiLineComment, cmp->pos - 1, 0);
cmp->column++;
cmp->pos++;
while(cmp->pos < cmp->code.len){
c = cmp->code.data[cmp->pos];
// closing comment
if(cmp->pos > tok.begin + 3 && c == '/' && cmp->code.data[cmp->pos - 1] == '*') {
tok.length = cmp->pos - tok.begin + 1;
List_Token_push(&cmp->tokens, tok);
return;
}
if(c == '\n')
completeLine(cmp);
cmp->column++;
cmp->pos++;
}
// end of file
setError(Error_endOfFile);
}
static void readComment(Compiler* cmp){
char c; // '/'
if(cmp->pos + 1 == cmp->code.len){
setError(Error_endOfFile);
return;
}
c = cmp->code.data[cmp->pos + 1];
if(c == '\r' || c == '\n'){
setError(Error_unexpectedCharacter(cmp->code.data[--cmp->pos]));
return;
}
cmp->pos++;
cmp->column++;
if(c == '/')
readCommentSingleLine(cmp);
else if(c == '*')
readCommentMultiLine(cmp);
else setError(Error_unexpectedCharacter(c));
}
static void readLabel(Compiler* cmp){
char c; // '.'
cmp->pos++;
cmp->column++;
Token tok = Token_construct(TokenType_Label, cmp->pos, 0);
while(cmp->pos < cmp->code.len){
c = cmp->code.data[cmp->pos];
// end of line
if(c == ':' || c == '\r' || c == '\n'){
tok.length = cmp->pos - tok.begin;
if(tok.length > 0)
List_Token_push(&cmp->tokens, tok);
else setError(Error_unexpectedCharacter(cmp->code.data[--cmp->pos]));
// cmp->line will be increased in lex()
return;
}
if(!isAlphabeticalLower(c) && !isAlphabeticalUpper(c) && !isDigit(c) &&
c != '_' && c != '.'){
setError(Error_unexpectedCharacter(c));
return;
}
cmp->column++;
cmp->pos++;
}
// end of file
tok.length = cmp->pos - tok.begin;
if(tok.length > 0)
List_Token_push(&cmp->tokens, tok);
else setError(Error_endOfFile);
}
static void readArguments(Compiler* cmp){
char c; // space
Token tok = Token_construct(TokenType_Unset, cmp->pos, 0);
char quot = '\0'; // quotation character of a string value
while(cmp->pos < cmp->code.len){
c = cmp->code.data[cmp->pos];
// string argument reading
if(quot != '\0'){
if(c == quot && cmp->code.data[cmp->pos - 1] != '\\'){
quot = '\0';
}
else if(c == '\r' || c == '\n'){
setError("line end reached but string hasn't been closed yet");
return;
}
}
// end of operation
else if(c == '\r' || c == '\n' || c == ';'){
tok.length = cmp->pos - tok.begin;
if(tok.length > 0)
List_Token_push(&cmp->tokens, tok);
// cmp->line will be increased in lex()
return;
}
// new argument begins
else if(c == ' ' || c == '\t'){
tok.length = cmp->pos - tok.begin;
if(tok.length > 0)
List_Token_push(&cmp->tokens, tok);
tok = Token_construct(TokenType_Unset, cmp->pos + 1, 0);
}
else if(tok.type == TokenType_Unset){
if(c == '\''){
tok.type = TokenType_Char;
quot = c;
}
else if(c == '"'){
tok.type = TokenType_String;
quot = c;
}
else if(c == '@')
tok.type = TokenType_NamedDataPointer;
else if(c == '#')
tok.type = TokenType_NamedDataSize;
else if(isDigit(c))
tok.type = TokenType_Number;
else tok.type = TokenType_Name;
}
cmp->column++;
cmp->pos++;
}
// end of file
tok.length = cmp->pos - tok.begin;
if(tok.length > 0)
List_Token_push(&cmp->tokens, tok);
}
static void readInstruction(Compiler* cmp){
Token tok = Token_construct(TokenType_Instruction, cmp->pos, 0);
cmp->pos++;
cmp->column++;
while(cmp->pos < cmp->code.len){
char c = cmp->code.data[cmp->pos];
// end of line
if(c == '\r' || c == '\n' || c == ';'){
tok.length = cmp->pos - tok.begin;
List_Token_push(&cmp->tokens, tok);
tok = Token_construct(TokenType_OperationEnd, cmp->pos, 1);
List_Token_push(&cmp->tokens, tok);
// cmp->line will be increased in lex()
return;
}
// arguments begin
if(c == ' ' || c == '\t'){
tok.length = cmp->pos - tok.begin;
List_Token_push(&cmp->tokens, tok);
readArguments(cmp);
tok = Token_construct(TokenType_OperationEnd, cmp->pos, 1);
List_Token_push(&cmp->tokens, tok);
return;
}
if(!isAlphabeticalLower(c) && !isAlphabeticalUpper(c) && !isDigit(c)){
setError(Error_unexpectedCharacter(c));
return;
}
cmp->column++;
cmp->pos++;
}
// end of file
tok.length = cmp->pos - tok.begin;
List_Token_push(&cmp->tokens, tok);
tok = Token_construct(TokenType_OperationEnd, cmp->pos, 1);
List_Token_push(&cmp->tokens, tok);
}
bool Compiler_lex(Compiler* cmp){
returnErrorIf_auto(cmp->state != CompilerState_Initial);
cmp->state = CompilerState_Lexing;
cmp->column = 1;
while(cmp->pos < cmp->code.len){
char c = cmp->code.data[cmp->pos];
switch(c){
// skip blank characters
case ' ': case '\t': case '\r': case '\n':
break;
// try read comment
case '/':
readComment(cmp);
break;
// try read label
case '.':
readLabel(cmp);
break;
default:
// try read instruction
if(isAlphabeticalLower(c) || isAlphabeticalUpper(c))
readInstruction(cmp);
else returnError(Error_unexpectedCharacter(c));
break;
}
if(cmp->state == CompilerState_Error)
return false;
c = cmp->code.data[cmp->pos];
if(c == '\n')
completeLine(cmp);
cmp->column++;
cmp->pos++;
}
completeLine(cmp);
return true;
}