#include "Compiler_internal.h" #define setError(FORMAT, ...) {\ completeLine(cmp);\ Compiler_setError(cmp, FORMAT, ##__VA_ARGS__);\ } #define Error_unexpectedCharacter(C) "unexpected character '%c'", C #define Error_endOfFile "unexpected end of file" static void completeLine(Compiler* cmp){ List_u32_push(&cmp->line_lengths, cmp->column); cmp->column = 0; } static void readCommentSingleLine(Compiler* cmp){ char c; // '/' Token tok = Token_construct(TokenType_SingleLineComment, cmp->pos - 1, 0); cmp->column++; cmp->pos++; while(cmp->pos < cmp->code.len){ c = cmp->code.data[cmp->pos]; // end of line if(c == '\r' || c == '\n'){ tok.length = cmp->pos - tok.begin; List_Token_push(&cmp->tokens, tok); // cmp->line will be increased in lex() return; } cmp->column++; cmp->pos++; } // end of file tok.length = cmp->pos - tok.begin; List_Token_push(&cmp->tokens, tok); } static void readCommentMultiLine(Compiler* cmp){ char c; // '*' Token tok = Token_construct(TokenType_MultiLineComment, cmp->pos - 1, 0); cmp->column++; cmp->pos++; while(cmp->pos < cmp->code.len){ c = cmp->code.data[cmp->pos]; // closing comment if(cmp->pos > tok.begin + 3 && c == '/' && cmp->code.data[cmp->pos - 1] == '*') { tok.length = cmp->pos - tok.begin + 1; List_Token_push(&cmp->tokens, tok); return; } if(c == '\n') completeLine(cmp); cmp->column++; cmp->pos++; } // end of file setError(Error_endOfFile); } static void readComment(Compiler* cmp){ char c; // '/' if(cmp->pos + 1 == cmp->code.len){ setError(Error_endOfFile); return; } c = cmp->code.data[cmp->pos + 1]; if(c == '\r' || c == '\n'){ setError(Error_unexpectedCharacter(cmp->code.data[--cmp->pos])); return; } cmp->pos++; cmp->column++; if(c == '/') readCommentSingleLine(cmp); else if(c == '*') readCommentMultiLine(cmp); else setError(Error_unexpectedCharacter(c)); } static void readLabel(Compiler* cmp){ char c; // '.' cmp->pos++; cmp->column++; Token tok = Token_construct(TokenType_Label, cmp->pos, 0); while(cmp->pos < cmp->code.len){ c = cmp->code.data[cmp->pos]; // end of line if(c == ':' || c == '\r' || c == '\n'){ tok.length = cmp->pos - tok.begin; if(tok.length > 0) List_Token_push(&cmp->tokens, tok); else setError(Error_unexpectedCharacter(cmp->code.data[--cmp->pos])); // cmp->line will be increased in lex() return; } if(!isAlphabeticalLower(c) && !isAlphabeticalUpper(c) && !isDigit(c) && c != '_' && c != '.'){ setError(Error_unexpectedCharacter(c)); return; } cmp->column++; cmp->pos++; } // end of file tok.length = cmp->pos - tok.begin; if(tok.length > 0) List_Token_push(&cmp->tokens, tok); else setError(Error_endOfFile); } static void readArguments(Compiler* cmp){ char c; // space Token tok = Token_construct(TokenType_Unset, cmp->pos, 0); char quot = '\0'; // quotation character of a string value while(cmp->pos < cmp->code.len){ c = cmp->code.data[cmp->pos]; // string argument reading if(quot != '\0'){ if(c == quot && cmp->code.data[cmp->pos - 1] != '\\'){ quot = '\0'; } else if(c == '\r' || c == '\n'){ setError("line end reached but string hasn't been closed yet"); return; } } // end of operation else if(c == '\r' || c == '\n' || c == ';'){ tok.length = cmp->pos - tok.begin; if(tok.length > 0) List_Token_push(&cmp->tokens, tok); // cmp->line will be increased in lex() return; } // new argument begins else if(c == ' ' || c == '\t'){ tok.length = cmp->pos - tok.begin; if(tok.length > 0) List_Token_push(&cmp->tokens, tok); tok = Token_construct(TokenType_Unset, cmp->pos + 1, 0); } else if(tok.type == TokenType_Unset){ if(c == '\''){ tok.type = TokenType_Char; quot = c; } else if(c == '"'){ tok.type = TokenType_String; quot = c; } else if(c == '@') tok.type = TokenType_NamedDataPointer; else if(c == '#') tok.type = TokenType_NamedDataSize; else if(isDigit(c)) tok.type = TokenType_Number; else tok.type = TokenType_Name; } cmp->column++; cmp->pos++; } // end of file tok.length = cmp->pos - tok.begin; if(tok.length > 0) List_Token_push(&cmp->tokens, tok); } static void readInstruction(Compiler* cmp){ Token tok = Token_construct(TokenType_Instruction, cmp->pos, 0); cmp->pos++; cmp->column++; while(cmp->pos < cmp->code.len){ char c = cmp->code.data[cmp->pos]; // end of line if(c == '\r' || c == '\n' || c == ';'){ tok.length = cmp->pos - tok.begin; List_Token_push(&cmp->tokens, tok); tok = Token_construct(TokenType_OperationEnd, cmp->pos, 1); List_Token_push(&cmp->tokens, tok); // cmp->line will be increased in lex() return; } // arguments begin if(c == ' ' || c == '\t'){ tok.length = cmp->pos - tok.begin; List_Token_push(&cmp->tokens, tok); readArguments(cmp); tok = Token_construct(TokenType_OperationEnd, cmp->pos, 1); List_Token_push(&cmp->tokens, tok); return; } if(!isAlphabeticalLower(c) && !isAlphabeticalUpper(c) && !isDigit(c)){ setError(Error_unexpectedCharacter(c)); return; } cmp->column++; cmp->pos++; } // end of file tok.length = cmp->pos - tok.begin; List_Token_push(&cmp->tokens, tok); tok = Token_construct(TokenType_OperationEnd, cmp->pos, 1); List_Token_push(&cmp->tokens, tok); } bool Compiler_lex(Compiler* cmp){ returnErrorIf_auto(cmp->state != CompilerState_Initial); cmp->state = CompilerState_Lexing; cmp->column = 1; while(cmp->pos < cmp->code.len){ char c = cmp->code.data[cmp->pos]; switch(c){ // skip blank characters case ' ': case '\t': case '\r': case '\n': break; // try read comment case '/': readComment(cmp); break; // try read label case '.': readLabel(cmp); break; default: // try read instruction if(isAlphabeticalLower(c) || isAlphabeticalUpper(c)) readInstruction(cmp); else returnError(Error_unexpectedCharacter(c)); break; } if(cmp->state == CompilerState_Error) return false; c = cmp->code.data[cmp->pos]; if(c == '\n') completeLine(cmp); cmp->column++; cmp->pos++; } completeLine(cmp); return true; }