270 lines
7.5 KiB
C
270 lines
7.5 KiB
C
#include "Compiler_internal.h"
|
|
|
|
#define setError(FORMAT, ...) {\
|
|
completeLine(cmp);\
|
|
Compiler_setError(cmp, FORMAT, ##__VA_ARGS__);\
|
|
}
|
|
|
|
#define Error_unexpectedCharacter(C) "unexpected character '%c'", C
|
|
#define Error_endOfFile "unexpected end of file"
|
|
|
|
static void completeLine(Compiler* cmp){
|
|
List_u32_push(&cmp->line_lengths, cmp->column);
|
|
cmp->column = 0;
|
|
}
|
|
|
|
static void readCommentSingleLine(Compiler* cmp){
|
|
char c; // '/'
|
|
Token tok = Token_construct(TokenType_SingleLineComment, cmp->pos - 1, 0);
|
|
cmp->column++;
|
|
cmp->pos++;
|
|
|
|
while(cmp->pos < cmp->code.len){
|
|
c = cmp->code.data[cmp->pos];
|
|
// end of line
|
|
if(c == '\r' || c == '\n'){
|
|
tok.length = cmp->pos - tok.begin;
|
|
List_Token_push(&cmp->tokens, tok);
|
|
// cmp->line will be increased in lex()
|
|
return;
|
|
}
|
|
|
|
cmp->column++;
|
|
cmp->pos++;
|
|
}
|
|
|
|
// end of file
|
|
tok.length = cmp->pos - tok.begin;
|
|
List_Token_push(&cmp->tokens, tok);
|
|
}
|
|
|
|
static void readCommentMultiLine(Compiler* cmp){
|
|
char c; // '*'
|
|
Token tok = Token_construct(TokenType_MultiLineComment, cmp->pos - 1, 0);
|
|
cmp->column++;
|
|
cmp->pos++;
|
|
|
|
while(cmp->pos < cmp->code.len){
|
|
c = cmp->code.data[cmp->pos];
|
|
// closing comment
|
|
if(cmp->pos > tok.begin + 3 && c == '/' && cmp->code.data[cmp->pos - 1] == '*') {
|
|
tok.length = cmp->pos - tok.begin + 1;
|
|
List_Token_push(&cmp->tokens, tok);
|
|
return;
|
|
}
|
|
|
|
if(c == '\n')
|
|
completeLine(cmp);
|
|
cmp->column++;
|
|
cmp->pos++;
|
|
}
|
|
|
|
// end of file
|
|
setError(Error_endOfFile);
|
|
}
|
|
|
|
static void readComment(Compiler* cmp){
|
|
char c; // '/'
|
|
if(cmp->pos + 1 == cmp->code.len){
|
|
setError(Error_endOfFile);
|
|
return;
|
|
}
|
|
|
|
c = cmp->code.data[cmp->pos + 1];
|
|
if(c == '\r' || c == '\n'){
|
|
setError(Error_unexpectedCharacter(cmp->code.data[--cmp->pos]));
|
|
return;
|
|
}
|
|
|
|
cmp->pos++;
|
|
cmp->column++;
|
|
if(c == '/')
|
|
readCommentSingleLine(cmp);
|
|
else if(c == '*')
|
|
readCommentMultiLine(cmp);
|
|
else setError(Error_unexpectedCharacter(c));
|
|
}
|
|
|
|
static void readLabel(Compiler* cmp){
|
|
char c; // '.'
|
|
cmp->pos++;
|
|
cmp->column++;
|
|
Token tok = Token_construct(TokenType_Label, cmp->pos, 0);
|
|
|
|
while(cmp->pos < cmp->code.len){
|
|
c = cmp->code.data[cmp->pos];
|
|
// end of line
|
|
if(c == ':' || c == '\r' || c == '\n'){
|
|
tok.length = cmp->pos - tok.begin;
|
|
if(tok.length > 0)
|
|
List_Token_push(&cmp->tokens, tok);
|
|
else setError(Error_unexpectedCharacter(cmp->code.data[--cmp->pos]));
|
|
// cmp->line will be increased in lex()
|
|
return;
|
|
}
|
|
|
|
if(!isAlphabeticalLower(c) && !isAlphabeticalUpper(c) && !isDigit(c) &&
|
|
c != '_' && c != '.'){
|
|
setError(Error_unexpectedCharacter(c));
|
|
return;
|
|
}
|
|
|
|
cmp->column++;
|
|
cmp->pos++;
|
|
}
|
|
|
|
// end of file
|
|
tok.length = cmp->pos - tok.begin;
|
|
if(tok.length > 0)
|
|
List_Token_push(&cmp->tokens, tok);
|
|
else setError(Error_endOfFile);
|
|
}
|
|
|
|
static void readArguments(Compiler* cmp){
|
|
char c; // space
|
|
Token tok = Token_construct(TokenType_Unset, cmp->pos, 0);
|
|
char quot = '\0'; // quotation character of a string value
|
|
|
|
while(cmp->pos < cmp->code.len){
|
|
c = cmp->code.data[cmp->pos];
|
|
|
|
// string argument reading
|
|
if(quot != '\0'){
|
|
if(c == quot && cmp->code.data[cmp->pos - 1] != '\\'){
|
|
quot = '\0';
|
|
}
|
|
else if(c == '\r' || c == '\n'){
|
|
setError("line end reached but string hasn't been closed yet");
|
|
return;
|
|
}
|
|
}
|
|
|
|
// end of operation
|
|
else if(c == '\r' || c == '\n' || c == ';'){
|
|
tok.length = cmp->pos - tok.begin;
|
|
if(tok.length > 0)
|
|
List_Token_push(&cmp->tokens, tok);
|
|
// cmp->line will be increased in lex()
|
|
return;
|
|
}
|
|
|
|
// new argument begins
|
|
else if(c == ' ' || c == '\t'){
|
|
tok.length = cmp->pos - tok.begin;
|
|
if(tok.length > 0)
|
|
List_Token_push(&cmp->tokens, tok);
|
|
tok = Token_construct(TokenType_Unset, cmp->pos + 1, 0);
|
|
}
|
|
|
|
else if(tok.type == TokenType_Unset){
|
|
if(c == '\''){
|
|
tok.type = TokenType_Char;
|
|
quot = c;
|
|
}
|
|
else if(c == '"'){
|
|
tok.type = TokenType_String;
|
|
quot = c;
|
|
}
|
|
else if(c == '@')
|
|
tok.type = TokenType_NamedDataPointer;
|
|
else if(c == '#')
|
|
tok.type = TokenType_NamedDataSize;
|
|
else if(isDigit(c))
|
|
tok.type = TokenType_Number;
|
|
else tok.type = TokenType_Name;
|
|
}
|
|
|
|
cmp->column++;
|
|
cmp->pos++;
|
|
}
|
|
|
|
// end of file
|
|
tok.length = cmp->pos - tok.begin;
|
|
if(tok.length > 0)
|
|
List_Token_push(&cmp->tokens, tok);
|
|
}
|
|
|
|
static void readInstruction(Compiler* cmp){
|
|
Token tok = Token_construct(TokenType_Instruction, cmp->pos, 0);
|
|
cmp->pos++;
|
|
cmp->column++;
|
|
|
|
while(cmp->pos < cmp->code.len){
|
|
char c = cmp->code.data[cmp->pos];
|
|
// end of line
|
|
if(c == '\r' || c == '\n' || c == ';'){
|
|
tok.length = cmp->pos - tok.begin;
|
|
List_Token_push(&cmp->tokens, tok);
|
|
tok = Token_construct(TokenType_OperationEnd, cmp->pos, 1);
|
|
List_Token_push(&cmp->tokens, tok);
|
|
// cmp->line will be increased in lex()
|
|
return;
|
|
}
|
|
|
|
// arguments begin
|
|
if(c == ' ' || c == '\t'){
|
|
tok.length = cmp->pos - tok.begin;
|
|
List_Token_push(&cmp->tokens, tok);
|
|
readArguments(cmp);
|
|
tok = Token_construct(TokenType_OperationEnd, cmp->pos, 1);
|
|
List_Token_push(&cmp->tokens, tok);
|
|
return;
|
|
}
|
|
|
|
if(!isAlphabeticalLower(c) && !isAlphabeticalUpper(c) && !isDigit(c)){
|
|
setError(Error_unexpectedCharacter(c));
|
|
return;
|
|
}
|
|
|
|
cmp->column++;
|
|
cmp->pos++;
|
|
}
|
|
|
|
// end of file
|
|
tok.length = cmp->pos - tok.begin;
|
|
List_Token_push(&cmp->tokens, tok);
|
|
tok = Token_construct(TokenType_OperationEnd, cmp->pos, 1);
|
|
List_Token_push(&cmp->tokens, tok);
|
|
}
|
|
|
|
bool Compiler_lex(Compiler* cmp){
|
|
returnErrorIf_auto(cmp->state != CompilerState_Initial);
|
|
cmp->state = CompilerState_Lexing;
|
|
cmp->column = 1;
|
|
|
|
while(cmp->pos < cmp->code.len){
|
|
char c = cmp->code.data[cmp->pos];
|
|
switch(c){
|
|
// skip blank characters
|
|
case ' ': case '\t': case '\r': case '\n':
|
|
break;
|
|
// try read comment
|
|
case '/':
|
|
readComment(cmp);
|
|
break;
|
|
// try read label
|
|
case '.':
|
|
readLabel(cmp);
|
|
break;
|
|
default:
|
|
// try read instruction
|
|
if(isAlphabeticalLower(c) || isAlphabeticalUpper(c))
|
|
readInstruction(cmp);
|
|
else returnError(Error_unexpectedCharacter(c));
|
|
break;
|
|
}
|
|
|
|
if(cmp->state == CompilerState_Error)
|
|
return false;
|
|
|
|
c = cmp->code.data[cmp->pos];
|
|
if(c == '\n')
|
|
completeLine(cmp);
|
|
cmp->column++;
|
|
cmp->pos++;
|
|
}
|
|
|
|
completeLine(cmp);
|
|
return true;
|
|
}
|