ParadoxSaveParser/ParadoxSaveParser.Lib/Parser.cs

270 lines
8.1 KiB
C#

global using System;
global using System.IO;
global using System.Text;
global using System.Collections.Generic;
namespace ParadoxSaveParser.Lib;
public class Parser
{
protected Stream _saveFile;
private List<Token> _tokens = new(4_194_304);
private int _tokenIndex;
public Parser(Stream savefile)
{
_saveFile = savefile;
}
protected enum TokenType : byte
{
Invalid,
String,
Equals,
BracketOpen,
BracketClose,
}
protected struct Token
{
public TokenType type;
public short column;
public int line;
public string? value;
public override string ToString()
{
string s;
switch (type)
{
case TokenType.Invalid:
s = "INVALID_TOKEN";
break;
case TokenType.String:
s = value ?? "NULL";
break;
case TokenType.Equals:
s = "=";
break;
case TokenType.BracketOpen:
s = "{";
break;
case TokenType.BracketClose:
s = "}";
break;
default:
throw new ArgumentOutOfRangeException(type.ToString());
}
return $"{line}:{column} '{s}'";
}
}
protected void Lex()
{
_tokens.Clear();
string expectedHeader = "EU4txt";
byte[] headBytes = new byte[expectedHeader.Length];
_saveFile.ReadExactly(headBytes);
string headStr = Encoding.UTF8.GetString(headBytes);
if (headStr != expectedHeader)
throw new Exception($"Invalid gamestate header: '{headStr}'");
StringBuilder str = new();
int line = 2;
int column = 0;
bool isQuoteOpen = false;
bool isStrInQuotes = false;
void CompleteStringToken()
{
if (isQuoteOpen)
return;
// strings in quotes can be empty
if (!isStrInQuotes && (str.Length <= 0 || str[0] == '#'))
return;
_tokens.Add(new Token
{
type = TokenType.String,
column = (short)(column - str.Length),
line = line,
value = str.ToString()
});
str.Clear();
isStrInQuotes = false;
}
while (_saveFile.CanRead)
{
int c = _saveFile.ReadByte();
column++;
switch (c)
{
case -1:
CompleteStringToken();
return;
case '\"':
isQuoteOpen = !isQuoteOpen;
isStrInQuotes = true;
break;
case ' ':
case '\t':
case '\r':
CompleteStringToken();
break;
case '\n':
CompleteStringToken();
line++;
column = 0;
break;
case '=':
CompleteStringToken();
_tokens.Add(new Token
{
type = TokenType.Equals,
line = line, column = (short)column
});
break;
case '{':
CompleteStringToken();
_tokens.Add(new Token
{
type = TokenType.BracketOpen,
line = line, column = (short)column
});
break;
case '}':
CompleteStringToken();
_tokens.Add(new Token
{
type = TokenType.BracketClose,
line = line, column = (short)column
});
break;
default:
str.Append((char)c);
break;
}
}
}
protected class UnexpectedTokenException : Exception
{
public UnexpectedTokenException(Token token, int tokenIndex) :
base($"Unexpected token at index {tokenIndex}: {token}")
{}
}
private object? ParseValue()
{
Token tok = _tokens[_tokenIndex++];
switch (tok.type)
{
case TokenType.String:
return tok.value!;
case TokenType.BracketOpen:
return ParseListOrDict();
case TokenType.BracketClose:
return null;
default:
throw new UnexpectedTokenException(tok, _tokenIndex - 1);
}
}
private object ParseListOrDict()
{
Token first = _tokens[_tokenIndex];
Token second = _tokens[_tokenIndex + 1];
if (first.type == TokenType.String && second.type == TokenType.Equals)
return ParseDict();
return ParseList();
}
private List<object> ParseList()
{
List<object> list = new();
while(true)
{
object? value = ParseValue();
if (value == null)
break;
list.Add(value);
}
return list;
}
private Dictionary<string, List<object>> ParseDict()
{
Dictionary<string, List<object>> dict = new();
// root is a dict without closing bracket, so this method must check _tokenIndex < _tokens.Count
while (_tokenIndex < _tokens.Count)
{
Token tok = _tokens[_tokenIndex++];
// end of dictionary
if (tok.type == TokenType.BracketClose)
break;
// Saves may contain some blocks without key.
// Such blocks are skipped because idk where to put them.
// Example: `technology_group=tech_cannorian{ }
// { } { } { }`
if (tok.type == TokenType.BracketOpen)
{
int bracketBalance = 1;
while (bracketBalance != 0)
{
tok = _tokens[_tokenIndex++];
if (tok.type == TokenType.BracketOpen)
bracketBalance++;
else if (tok.type == TokenType.BracketClose)
bracketBalance--;
}
continue;
}
if(tok.type != TokenType.String)
throw new UnexpectedTokenException(tok, _tokenIndex - 1);
string key = tok.value!;
tok = _tokens[_tokenIndex++];
if (tok.type == TokenType.BracketOpen)
{
// Saves may contain key-value definition without `=`.
// Example: `map_area_data{` instead of `map_area_data = {`
_tokenIndex--;
}
else if(tok.type != TokenType.Equals)
throw new UnexpectedTokenException(tok, _tokenIndex - 1);
object? value = ParseValue();
if (value == null)
throw new UnexpectedTokenException(_tokens[_tokenIndex - 1], _tokenIndex - 1);
if(!dict.TryGetValue(key, out List<object>? list))
{
list = new List<object>();
dict.Add(key, list);
}
list.Add(value);
}
return dict;
}
public Dictionary<string, List<object>> Parse()
{
Lex();
if (_tokens.Count == 0)
throw new Exception("Save file is empty");
_tokenIndex = 0;
var root = ParseDict();
return root;
}
}