ParadoxSaveParser/ParadoxSaveParser.Lib/SaveParserEU4.cs

348 lines
11 KiB
C#

global using System;
global using System.Collections.Generic;
global using System.IO;
global using System.Text;
namespace ParadoxSaveParser.Lib;
/// <summary>
/// Sequential parser that doesn't cache anything.
/// </summary>
public class SaveParserEU4
{
protected Stream _saveFile;
private ISearchExpression? _searchExprCurrent;
private readonly BufferedEnumerator<Token> _tokens;
/// <param name="savefile">
/// Uncompressed stream of <c>gamestate</c> file which can be extracted from save archive
/// </param>
/// <param name="query">
/// Parsing whole save takes 10 seconds on mid pc and takes 1GB of RAM,
/// so you should specify what exactly you want to get from save file
/// </param>
public SaveParserEU4(Stream savefile, ISearchExpression? query)
{
_tokens = new BufferedEnumerator<Token>(LexTextSave(), 5);
_saveFile = savefile;
_searchExprCurrent = query;
}
protected IEnumerator<Token> LexTextSave()
{
string expectedHeader = "EU4txt";
byte[] headBytes = new byte[expectedHeader.Length];
_saveFile.ReadExactly(headBytes);
string headStr = Encoding.UTF8.GetString(headBytes);
if (headStr != expectedHeader)
throw new Exception($"Invalid gamestate header. Expected '{expectedHeader}', got '{headStr}'.");
StringBuilder str = new();
int line = 2;
int column = 0;
bool isQuoteOpen = false;
bool isStrInQuotes = false;
Token strToken = new()
{
type = TokenType.Invalid,
column = -1,
line = -1
};
bool TryCompleteStringToken()
{
if (isQuoteOpen)
return false;
// strings in quotes may be empty
if (!isStrInQuotes && (str.Length <= 0 || str[0] == '#'))
return false;
strToken = new Token
{
type = TokenType.StringOrNumber,
column = (short)(column - str.Length),
line = line,
value = str.ToString()
};
str.Clear();
isStrInQuotes = false;
return true;
}
while (_saveFile.CanRead)
{
int c = _saveFile.ReadByte();
column++;
switch (c)
{
case -1:
if (TryCompleteStringToken())
yield return strToken;
yield break;
case '\"':
isQuoteOpen = !isQuoteOpen;
isStrInQuotes = true;
break;
case ' ':
case '\t':
case '\r':
if (TryCompleteStringToken())
yield return strToken;
break;
case '\n':
if (TryCompleteStringToken())
yield return strToken;
line++;
column = 0;
break;
case '=':
if (TryCompleteStringToken())
yield return strToken;
yield return new Token
{
type = TokenType.Equals,
line = line, column = (short)column
};
break;
case '{':
if (TryCompleteStringToken())
yield return strToken;
yield return new Token
{
type = TokenType.BracketOpen,
line = line, column = (short)column
};
break;
case '}':
if (TryCompleteStringToken())
yield return strToken;
yield return new Token
{
type = TokenType.BracketClose,
line = line, column = (short)column
};
break;
default:
// Skip control characters, which are invisible and causing frontend bugs.
// I dont know why there are so many of them in strings.
if (c >= 0x20)
str.Append((char)c);
break;
}
}
}
// doesn't move next
private object? ParseValue()
{
var tok = _tokens.Current.Value;
switch (tok.type)
{
case TokenType.StringOrNumber:
if (string.IsNullOrEmpty(tok.value))
return string.Empty;
if (tok.value[0] != '-' && !char.IsDigit(tok.value[0]))
return tok.value;
if (tok.value.Contains('.') && double.TryParse(tok.value, out double d))
return d;
if (long.TryParse(tok.value, out long l))
return l;
return tok.value;
case TokenType.BracketOpen:
object obj = ParseListOrDict();
return obj;
case TokenType.BracketClose:
return null;
default:
throw new UnexpectedTokenException(tok);
}
}
// skips next value
/// <returns>true if skipped value, false if current token is closing bracket</returns>
private bool SkipValue()
{
var tok = _tokens.Current.Value;
if (tok.type == TokenType.BracketOpen)
{
SkipObject();
return true;
}
return tok.type != TokenType.BracketClose;
}
// skips all tokens inside curly braces block
private void SkipObject(int bracketBalance = 1)
{
while (bracketBalance != 0 && _tokens.MoveNext())
{
var tok = _tokens.Current.Value;
if (tok.type == TokenType.BracketOpen)
bracketBalance++;
else if (tok.type == TokenType.BracketClose)
bracketBalance--;
}
}
// doesn't move next
private object ParseListOrDict()
{
var first = _tokens.Current.Next;
var second = _tokens.Current.Next?.Next;
if (first?.Value.type == TokenType.StringOrNumber && second?.Value.type == TokenType.Equals)
return ParseDict();
return ParseList();
}
// moves next
private List<object> ParseList()
{
List<object> list = new();
while (true)
{
if (!_tokens.MoveNext())
throw new Exception("Unexpected end of file");
object? value = ParseValue();
if (value is null)
break;
list.Add(value);
}
return list;
}
// moves next
private Dictionary<string, List<object>> ParseDict()
{
Dictionary<string, List<object>> dict = new();
// root is a dict without closing bracket, so this method must check _tokenIndex < _tokens.Count
for (int localIndex = 0; _tokens.MoveNext(); localIndex++)
{
var tok = _tokens.Current.Value;
// end of dictionary
if (tok.type == TokenType.BracketClose)
break;
// Saves may contain some blocks without key.
// Such blocks are skipped because idk where to put them.
// Example: `technology_group=tech_cannorian{ }
// { } { } { }`
if (tok.type == TokenType.BracketOpen)
{
SkipObject();
continue;
}
if (tok.type != TokenType.StringOrNumber)
throw new UnexpectedTokenException(tok);
string key = tok.value!;
// next token should be `=` or `{`
if (!_tokens.MoveNext())
throw new UnexpectedTokenException(tok);
tok = _tokens.Current.Value;
if (tok.type == TokenType.Equals)
{
// skip `=`
if (!_tokens.MoveNext())
throw new UnexpectedTokenException(tok);
}
// Saves may contain object definition without `=`.
// Example: `map_area_data {` instead of `map_area_data = {`
else if (tok.type != TokenType.BracketOpen)
{
throw new UnexpectedTokenException(tok);
}
ISearchExpression? searchExprNext = null;
if (_searchExprCurrent != null
&& !_searchExprCurrent.DoesMatch(new SearchArgs(key, localIndex), out searchExprNext))
{
SkipValue();
continue;
}
var searExpressionPrevious = _searchExprCurrent;
_searchExprCurrent = searchExprNext;
object? value = ParseValue();
if (value is null)
throw new UnexpectedTokenException(_tokens.Current.Value);
_searchExprCurrent = searExpressionPrevious;
if (!dict.TryGetValue(key, out var list))
{
list = new List<object>();
dict.Add(key, list);
}
list.Add(value);
}
return dict;
}
public Dictionary<string, List<object>> Parse()
{
var root = ParseDict();
return root;
}
protected enum TokenType : byte
{
Invalid,
StringOrNumber,
Equals,
BracketOpen,
BracketClose
}
protected struct Token
{
public required TokenType type;
public required short column;
public required int line;
public string? value;
public override string ToString()
{
string s;
switch (type)
{
case TokenType.Invalid:
s = "INVALID_TOKEN";
break;
case TokenType.StringOrNumber:
s = value ?? "NULL";
break;
case TokenType.Equals:
s = "=";
break;
case TokenType.BracketOpen:
s = "{";
break;
case TokenType.BracketClose:
s = "}";
break;
default:
throw new ArgumentOutOfRangeException(type.ToString());
}
return $"{line}:{column} '{s}'";
}
}
protected class UnexpectedTokenException : Exception
{
public UnexpectedTokenException(Token token) :
base($"Unexpected token: {token}")
{
}
}
}