argument parsing

This commit is contained in:
Timerix 2025-01-20 22:42:32 +05:00
parent dbe8569a3b
commit facacc90f8
12 changed files with 153 additions and 324 deletions

View File

@ -49,7 +49,7 @@ i32 VM_boot(VM* vm){
while (vm->current_pos < vm->data_size){ while (vm->current_pos < vm->data_size){
u8 opcode = vm->data[vm->current_pos]; u8 opcode = vm->data[vm->current_pos];
const Instruction* instr = Instruction_getFromOpcode(opcode); const Instruction* instr = Instruction_getByOpcode(opcode);
// printfe("[at 0x%x] %02X %s\n", (u32)vm->current_pos, opcode, instr->name); // printfe("[at 0x%x] %02X %s\n", (u32)vm->current_pos, opcode, instr->name);
if(instr == NULL){ if(instr == NULL){
VM_setError(vm, "unknown opcode %02X", opcode); VM_setError(vm, "unknown opcode %02X", opcode);

5
src/compiler/AST.c Normal file
View File

@ -0,0 +1,5 @@
#include "AST.h"
List_define(Argument);
List_define(Operation);
List_define(DataDefinition);

View File

@ -7,27 +7,37 @@ typedef enum ArgumentType {
ArgumentType_NoArgument, ArgumentType_NoArgument,
ArgumentType_Register, ArgumentType_Register,
ArgumentType_ConstValue, ArgumentType_ConstValue,
ArgumentType_Name, ArgumentType_DataName,
ArgumentType_NamedDataPointer, ArgumentType_NamedDataPointer,
ArgumentType_NamedDataSize, ArgumentType_NamedDataSize,
}; } ArgumentType;
typedef struct Argument { typedef struct Argument {
ArgumentType type; ArgumentType type;
u32 value; u32 value;
} Argument; } Argument;
List_declare(Argument);
typedef struct Operation {
Opcode op;
List_Argument args;
} Operation;
List_declare(Operation);
typedef struct DataDefinition { typedef struct DataDefinition {
u8 element_size; u8 element_size;
u16 size; u16 count;
void* data;
cstr name; cstr name;
}; void* data;
} DataDefinition;
List_declare(DataDefinition); List_declare(DataDefinition);
typedef struct AST { typedef struct AST {
DataDefinition List_DataDefinition data;
List_Operation operations;
} AST; } AST;
/* /*

View File

@ -1,78 +1,19 @@
#include "Lexer.h" #include "Compiler_internal.h"
List_define(Token); #define setError(FORMAT, ...) {\
completeLine(cmp);\
CodePos Lexer_getLineAndColumn(Lexer* cmp, u32 pos){ Compiler_setError(cmp, FORMAT, ##__VA_ARGS__);\
u32 prev_lines_len = 0;
if(pos >= cmp->code_len)
return CodePos_create(0, 0);
for(u32 i = 0; i < cmp->line_lengths.len; i++){
u32 line_len = cmp->line_lengths.data[i];
if(prev_lines_len + line_len > pos)
return CodePos_create(i + 1, pos + 1 - prev_lines_len);
prev_lines_len += line_len;
}
return CodePos_create(0, 0);
} }
static void completeLine(Lexer* cmp){
List_u32_push(&cmp->line_lengths, cmp->column);
cmp->column = 0;
}
void _Lexer_setError(Lexer* cmp, cstr context, cstr format, ...){
completeLine(cmp);
// happens at the end of file
if(cmp->pos >= cmp->code_len)
cmp->pos = cmp->code_len - 1;
char position_str[32];
CodePos code_pos = Lexer_getLineAndColumn(cmp, cmp->pos);
sprintf(position_str, "[at %u:%u][", code_pos.line, code_pos.column);
char* real_format = strcat_malloc(position_str, context, "] ", format);
va_list argv;
va_start(argv, format);
char* NULLABLE(buf) = vsprintf_malloc(512, real_format, argv);
va_end(argv);
free(real_format);
if(buf == NULL){
buf = malloc(16);
strcpy(buf, "SPRINTF FAILED");
}
cmp->state = LexerState_Error;
cmp->error_message = buf;
}
#define setError(FORMAT, ...) Lexer_setError(cmp, FORMAT, ##__VA_ARGS__)
#define returnError(FORMAT, ...) {\
setError(FORMAT, ##__VA_ARGS__);\
return false;\
}
#define returnErrorIf(STATEMENT, FORMAT, ...) if(STATEMENT) returnError(FORMAT, ##__VA_ARGS__)
#define returnErrorIf_auto(STATEMENT) returnErrorIf(STATEMENT, #STATEMENT)
#define Error_unexpectedCharacter(C) "unexpected character '%c'", C #define Error_unexpectedCharacter(C) "unexpected character '%c'", C
#define Error_endOfFile "unexpected end of file" #define Error_endOfFile "unexpected end of file"
void Lexer_init(Lexer* cmp){ static void completeLine(Compiler* cmp){
memset(cmp, 0, sizeof(Lexer)); List_u32_push(&cmp->line_lengths, cmp->column);
cmp->state = LexerState_Initial; cmp->column = 0;
cmp->tokens = List_Token_alloc(4096);
cmp->line_lengths = List_u32_alloc(1024);
} }
void Lexer_free(Lexer* cmp){ static void readCommentSingleLine(Compiler* cmp){
free(cmp->code);
free(cmp->tokens.data);
free(cmp->line_lengths.data);
}
static void readCommentSingleLine(Lexer* cmp){
char c; // '/' char c; // '/'
Token tok = Token_construct(TokenType_SingleLineComment, cmp->pos - 1, 0); Token tok = Token_construct(TokenType_SingleLineComment, cmp->pos - 1, 0);
cmp->column++; cmp->column++;
@ -97,7 +38,7 @@ static void readCommentSingleLine(Lexer* cmp){
List_Token_push(&cmp->tokens, tok); List_Token_push(&cmp->tokens, tok);
} }
static void readCommentMultiLine(Lexer* cmp){ static void readCommentMultiLine(Compiler* cmp){
char c; // '*' char c; // '*'
Token tok = Token_construct(TokenType_MultiLineComment, cmp->pos - 1, 0); Token tok = Token_construct(TokenType_MultiLineComment, cmp->pos - 1, 0);
cmp->column++; cmp->column++;
@ -122,7 +63,7 @@ static void readCommentMultiLine(Lexer* cmp){
setError(Error_endOfFile); setError(Error_endOfFile);
} }
static void readComment(Lexer* cmp){ static void readComment(Compiler* cmp){
char c; // '/' char c; // '/'
if(cmp->pos + 1 == cmp->code_len){ if(cmp->pos + 1 == cmp->code_len){
setError(Error_endOfFile); setError(Error_endOfFile);
@ -144,7 +85,7 @@ static void readComment(Lexer* cmp){
else setError(Error_unexpectedCharacter(c)); else setError(Error_unexpectedCharacter(c));
} }
static void readLabel(Lexer* cmp){ static void readLabel(Compiler* cmp){
char c; // '.' char c; // '.'
cmp->pos++; cmp->pos++;
cmp->column++; cmp->column++;
@ -179,16 +120,27 @@ static void readLabel(Lexer* cmp){
else setError(Error_endOfFile); else setError(Error_endOfFile);
} }
static void readArguments(Lexer* cmp){ static void readArguments(Compiler* cmp){
char c; // space char c; // space
cmp->pos++; Token tok = Token_construct(TokenType_Unset, cmp->pos, 0);
cmp->column++; char quot = '\0'; // quotation character of a string value
Token tok = Token_construct(TokenType_Argument, cmp->pos, 0);
while(cmp->pos < cmp->code_len){ while(cmp->pos < cmp->code_len){
c = cmp->code[cmp->pos]; c = cmp->code[cmp->pos];
// string argument reading
if(quot != '\0'){
if(c == quot && cmp->code[cmp->pos - 1] != '\\'){
quot = '\0';
}
else if(c == '\r' || c == '\n'){
setError("line end reached but string hasn't been closed yet");
return;
}
}
// end of line // end of line
if(c == '\r' || c == '\n' || c == ';'){ else if(c == '\r' || c == '\n' || c == ';'){
tok.length = cmp->pos - tok.begin; tok.length = cmp->pos - tok.begin;
if(tok.length > 0) if(tok.length > 0)
List_Token_push(&cmp->tokens, tok); List_Token_push(&cmp->tokens, tok);
@ -197,13 +149,30 @@ static void readArguments(Lexer* cmp){
} }
// new argument begins // new argument begins
if(c == ' ' || c == '\t'){ else if(c == ' ' || c == '\t'){
tok.length = cmp->pos - tok.begin; tok.length = cmp->pos - tok.begin;
if(tok.length > 0) if(tok.length > 0)
List_Token_push(&cmp->tokens, tok); List_Token_push(&cmp->tokens, tok);
tok.begin = cmp->pos + 1; tok = Token_construct(TokenType_Unset, cmp->pos + 1, 0);
}
else if(tok.type == TokenType_Unset){
if(c == '\''){
tok.type = TokenType_Char;
quot = c;
}
else if(c == '"'){
tok.type = TokenType_String;
quot = c;
}
else if(c == '@')
tok.type = TokenType_NamedDataPointer;
else if(c == '#')
tok.type = TokenType_NamedDataSize;
else if(isDigit(c))
tok.type = TokenType_Number;
else tok.type = TokenType_Name;
} }
cmp->column++; cmp->column++;
cmp->pos++; cmp->pos++;
@ -215,7 +184,7 @@ static void readArguments(Lexer* cmp){
List_Token_push(&cmp->tokens, tok); List_Token_push(&cmp->tokens, tok);
} }
static void readInstruction(Lexer* cmp){ static void readInstruction(Compiler* cmp){
Token tok = Token_construct(TokenType_Instruction, cmp->pos, 0); Token tok = Token_construct(TokenType_Instruction, cmp->pos, 0);
cmp->pos++; cmp->pos++;
cmp->column++; cmp->column++;
@ -252,86 +221,9 @@ static void readInstruction(Lexer* cmp){
List_Token_push(&cmp->tokens, tok); List_Token_push(&cmp->tokens, tok);
} }
static void readChar(Lexer* cmp){ bool Compiler_lex(Compiler* cmp){
Token tok = Token_construct(TokenType_Char, cmp->pos, 0); returnErrorIf_auto(cmp->state != CompilerState_Initial);
cmp->pos++; cmp->state = CompilerState_Lexing;
cmp->column++;
while(cmp->pos < cmp->code_len){
char c = cmp->code[cmp->pos];
// end of line
if(c == '\r' || c == '\n'){
setError(Error_unexpectedCharacter(cmp->code[--cmp->pos]));
return;
}
if(c == '\''){
tok.length = cmp->pos - tok.begin + 1;
List_Token_push(&cmp->tokens, tok);
return;
}
cmp->column++;
cmp->pos++;
}
// end of file
setError(Error_endOfFile);
}
static void readString(Lexer* cmp){
Token tok = Token_construct(TokenType_String, cmp->pos, 0);
cmp->pos++;
cmp->column++;
while(cmp->pos < cmp->code_len){
char c = cmp->code[cmp->pos];
// end of line
if(c == '\r' || c == '\n'){
setError(Error_unexpectedCharacter(cmp->code[--cmp->pos]));
return;
}
if(c == '"'){
tok.length = cmp->pos - tok.begin + 1;
List_Token_push(&cmp->tokens, tok);
return;
}
cmp->column++;
cmp->pos++;
}
// end of file
setError(Error_endOfFile);
}
static void readNumber(Lexer* cmp){
Token tok = Token_construct(TokenType_Number, cmp->pos, 0);
cmp->pos++;
cmp->column++;
while(cmp->pos < cmp->code_len){
char c = cmp->code[cmp->pos];
if(c == '\r' || c == '\n' || c == ' ' || c == '\t' || c == ',' || c == ';'){
tok.length = cmp->pos - tok.begin;
List_Token_push(&cmp->tokens, tok);
return;
}
cmp->column++;
cmp->pos++;
}
// end of file
tok.length = cmp->pos - tok.begin;
List_Token_push(&cmp->tokens, tok);
}
static bool lex(Lexer* cmp){
returnErrorIf_auto(cmp->state != LexerState_Initial);
cmp->state = LexerState_Lexing;
cmp->column = 1; cmp->column = 1;
while(cmp->pos < cmp->code_len){ while(cmp->pos < cmp->code_len){
@ -348,23 +240,15 @@ static bool lex(Lexer* cmp){
case '.': case '.':
readLabel(cmp); readLabel(cmp);
break; break;
case '"':
readString(cmp);
break;
case '\'':
readChar(cmp);
break;
default: default:
// try read instruction // try read instruction
if(isAlphabeticalLower(c) || isAlphabeticalUpper(c)) if(isAlphabeticalLower(c) || isAlphabeticalUpper(c))
readInstruction(cmp); readInstruction(cmp);
else if(isDigit(c))
readNumber(cmp);
else returnError(Error_unexpectedCharacter(c)); else returnError(Error_unexpectedCharacter(c));
break; break;
} }
if(cmp->state == LexerState_Error) if(cmp->state == CompilerState_Error)
return false; return false;
c = cmp->code[cmp->pos]; c = cmp->code[cmp->pos];
@ -377,94 +261,3 @@ static bool lex(Lexer* cmp){
completeLine(cmp); completeLine(cmp);
return true; return true;
} }
static bool parse(Lexer* cmp){
returnErrorIf_auto(cmp->state != LexerState_Lexing);
cmp->state = LexerState_Parsing;
return true;
}
static bool compile(Lexer* cmp, FILE* f){
returnErrorIf_auto(cmp->state != LexerState_Parsing);
cmp->state = LexerState_Compiling;
return true;
}
bool Lexer_compileTasm(Lexer* cmp, cstr source_file_name, cstr out_file_name, bool debug){
FILE* f = fopen(source_file_name, "rb");
if(f == NULL)
returnError("ERROR: can't open file '%s'", source_file_name);
List_u8 buf = List_u8_alloc(64 * 1024);
int ret;
while((ret = fgetc(f)) != EOF) {
List_u8_push(&buf, ret);
}
if(ferror(f)){
free(buf.data);
fclose(f);
returnError("can't read file '%s'", source_file_name);
}
fclose(f);
if(buf.len == 0){
free(buf.data);
fclose(f);
returnError("soucre file is empty");
}
cmp->code = (char*)buf.data;
cmp->code_len = buf.len;
List_u8_push(&buf, 0);
f = fopen(out_file_name, "wb");
if(f == NULL){
free(buf.data);
returnError("ERROR: can't open file '%s'", out_file_name);
}
if(debug){
printf("----------------------------------[%s]---------------------------------\n", source_file_name);
fputs(cmp->code, stdout);
fputc('\n', stdout);
}
bool success = lex(cmp);
if(debug){
printf("------------------------------------[lines]-----------------------------------\n");
for(u32 i = 0; i < cmp->line_lengths.len; i++){
printf("[%u] length: %u\n", i+1, cmp->line_lengths.data[i]);
}
printf("------------------------------------[tokens]-----------------------------------\n");
for(u32 i = 0; i < cmp->tokens.len; i++){
Token t = cmp->tokens.data[i];
CodePos pos = Lexer_getLineAndColumn(cmp, t.begin);
char* tokstr = malloc(4096);
strncpy(tokstr, cmp->code + t.begin, t.length);
tokstr[t.length] = 0;
printf("[l:%3u, c:%3u] %s '%s'\n",
pos.line, pos.column,
TokenType_toString(t.type), tokstr);
free(tokstr);
}
}
if(!success){
fclose(f);
return false;
}
success = parse(cmp);
if(!success){
fclose(f);
return false;
}
success = compile(cmp, f);
fclose(f);
if(success){
cmp->state = LexerState_Success;
}
return success;
}

View File

@ -1,48 +1,32 @@
#pragma once #pragma once
#include "../std.h" #include "../std.h"
#include "../collections/List.h" #include "../collections/List.h"
#include "token.h" #include "Token.h"
#include "AST.h" #include "AST.h"
List_declare(Token); typedef enum CompilerState {
CompilerState_Initial,
CompilerState_Lexing,
CompilerState_Parsing,
CompilerState_Compiling,
CompilerState_Error,
CompilerState_Success
} CompilerState;
typedef enum LexerState { typedef struct Compiler {
LexerState_Initial,
LexerState_Lexing,
LexerState_Parsing,
LexerState_Compiling,
LexerState_Error,
LexerState_Success
} LexerState;
typedef struct Lexer {
char* code; char* code;
u32 code_len; u32 code_len;
u32 column; // > 0 if code parsing started u32 column; // > 0 if code parsing started
u32 pos; u32 pos;
LexerState state; CompilerState state;
NULLABLE(char* error_message); NULLABLE(char* error_message);
List_Token tokens; List_Token tokens;
List_u32 line_lengths; List_u32 line_lengths;
} Lexer; } Compiler;
void Lexer_init(Lexer* cmp); void Compiler_init(Compiler* cmp);
void Lexer_free(Lexer* cmp); void Compiler_free(Compiler* cmp);
/// @brief compile assembly language code to machine code /// @brief compile assembly language code to machine code
/// @return true if no errors, false if any error occured (check cmp->error_message) /// @return true if no errors, false if any error occured (check cmp->error_message)
bool Lexer_compileTasm(Lexer* cmp, cstr source_file_name, cstr out_file_name, bool debug); bool Compiler_compile(Compiler* cmp, cstr source_file_name, cstr out_file_name, bool debug);
#define Lexer_setError(cmp, format, ...) _Lexer_setError(cmp, __func__, format ,##__VA_ARGS__)
void _Lexer_setError(Lexer* cmp, cstr context, cstr format, ...) __attribute__((__format__(__printf__, 3, 4)));
typedef struct CodePos {
u32 line; // 0 on error
u32 column; // 0 on error
} CodePos;
#define CodePos_create(L, C) ((CodePos){ .line = L, .column = C })
/// @param pos index in code buffer
CodePos Lexer_getLineAndColumn(Lexer* cmp, u32 pos);

View File

@ -0,0 +1,27 @@
#include "Compiler.h"
void _Compiler_setError(Compiler* cmp, cstr context, cstr format, ...) __attribute__((__format__(__printf__, 3, 4)));
#define Compiler_setError(cmp, format, ...) _Compiler_setError(cmp, __func__, format ,##__VA_ARGS__)
#define returnError(FORMAT, ...) {\
setError(FORMAT, ##__VA_ARGS__);\
return false;\
}
#define returnErrorIf(STATEMENT, FORMAT, ...) if(STATEMENT) returnError(FORMAT, ##__VA_ARGS__)
#define returnErrorIf_auto(STATEMENT) returnErrorIf(STATEMENT, #STATEMENT)
typedef struct CodePos {
u32 line; // 0 on error
u32 column; // 0 on error
} CodePos;
#define CodePos_create(L, C) ((CodePos){ .line = L, .column = C })
/// @param pos index in code buffer
CodePos Compiler_getLineAndColumn(Compiler* cmp, u32 pos);
bool Compiler_lex(Compiler* cmp);
bool Compiler_parse(Compiler* cmp);

View File

@ -1,3 +1,12 @@
#include "compiler.h" #include "Compiler_internal.h"
List_define(DataDefinition); #define setError(FORMAT, ...) {\
Compiler_setError(cmp, FORMAT, ##__VA_ARGS__);\
}
bool Compiler_parse(Compiler* cmp){
returnErrorIf_auto(cmp->state != CompilerState_Lexing);
cmp->state = CompilerState_Parsing;
return true;
}

View File

@ -1,19 +1,19 @@
#include "token.h" #include "Token.h"
List_define(Token);
static cstr _TokenType_str[] = { static cstr _TokenType_str[] = {
"Unset", "Unset",
"SingleLineComment", "SingleLineComment",
"MultiLineComment", "MultiLineComment",
"Instruction",
"Label", "Label",
"DataDefinition",
"Number", "Number",
"Char", "Char",
"String", "String",
"Instruction", "Name",
"Register", "NamedDataPointer",
"DataType", "NamedDataSize"
"DataPointer",
"DataSize"
}; };
cstr TokenType_toString(TokenType t){ cstr TokenType_toString(TokenType t){

View File

@ -1,20 +1,19 @@
#pragma once #pragma once
#include "../std.h" #include "../std.h"
#include "../collections/List.h"
typedef enum TokenType { typedef enum TokenType {
TokenType_Unset, TokenType_Unset, // initial value
TokenType_SingleLineComment, TokenType_SingleLineComment, // //comment
TokenType_MultiLineComment, TokenType_MultiLineComment, // /* comment */
TokenType_Label, TokenType_Instruction, // abc
TOkenType_DataDefinition, TokenType_Label, // .abc:
TokenType_Number, TokenType_Number, // 0123
TokenType_Char, TokenType_Char, // 'A'
TokenType_String, TokenType_String, // "aaaa"
TokenType_Instruction, TokenType_Name, // xyz
TokenType_Register, TokenType_NamedDataPointer, // @xyz
TokenType_DataType, TokenType_NamedDataSize // #xyz
TokenType_DataPointer,
TokenType_DataSize
} TokenType; } TokenType;
cstr TokenType_toString(TokenType t); cstr TokenType_toString(TokenType t);
@ -25,4 +24,6 @@ typedef struct Token {
TokenType type : 8; // type of token (8 bits) TokenType type : 8; // type of token (8 bits)
} Token; } Token;
List_declare(Token);
#define Token_construct(TYPE, BEGIN, END) ((Token){ .type = TYPE, .begin = BEGIN, .length = END }) #define Token_construct(TYPE, BEGIN, END) ((Token){ .type = TYPE, .begin = BEGIN, .length = END })

View File

@ -29,7 +29,7 @@ const Instruction instructions[] = {
// Instruction_construct(CALL), // Instruction_construct(CALL),
}; };
const Instruction* Instruction_getFromOpcode(Opcode opcode){ const Instruction* Instruction_getByOpcode(Opcode opcode){
if(opcode >= ARRAY_SIZE(instructions)) if(opcode >= ARRAY_SIZE(instructions))
return NULL; return NULL;

View File

@ -33,4 +33,4 @@ typedef struct Instruction {
/// @brief get instruction info from table /// @brief get instruction info from table
/// @param opcode any byte /// @param opcode any byte
/// @return ptr to struct or NULL /// @return ptr to struct or NULL
const Instruction* NULLABLE(Instruction_getFromOpcode)(Opcode opcode); const Instruction* NULLABLE(Instruction_getByOpcode)(Opcode opcode);

View File

@ -33,7 +33,7 @@ i32 main(const i32 argc, cstr* argv){
} }
else if(arg_is("-op") || arg_is("--opcodes")){ else if(arg_is("-op") || arg_is("--opcodes")){
for(u8 opcode = 0; opcode < 255; opcode++){ for(u8 opcode = 0; opcode < 255; opcode++){
const Instruction* instr = Instruction_getFromOpcode(opcode); const Instruction* instr = Instruction_getByOpcode(opcode);
if(instr != NULL){ if(instr != NULL){
printf("%02X %s\n", opcode, instr->name); printf("%02X %s\n", opcode, instr->name);
} }
@ -134,7 +134,7 @@ i32 bootFromImage(cstr image_file){
i32 compileSources(cstr source_file, cstr out_file){ i32 compileSources(cstr source_file, cstr out_file){
Compiler cmp; Compiler cmp;
Compiler_init(&cmp); Compiler_init(&cmp);
bool success = Compiler_compileTasm(&cmp, source_file, out_file, true); bool success = Compiler_compile(&cmp, source_file, out_file, true);
Compiler_free(&cmp); Compiler_free(&cmp);
if(!success){ if(!success){
if(cmp.error_message){ if(cmp.error_message){