414 lines
14 KiB
C#
414 lines
14 KiB
C#
global using System;
|
|
global using System.Collections.Generic;
|
|
global using System.IO;
|
|
global using System.Text;
|
|
using Microsoft.Extensions.ObjectPool;
|
|
|
|
namespace ParadoxSaveParser.Lib;
|
|
|
|
/// <summary>
|
|
/// Sequential parser that doesn't cache anything.
|
|
/// </summary>
|
|
public class SaveParserEU4
|
|
{
|
|
protected readonly Stream _saveFile;
|
|
private readonly BufferedEnumerator<Token> _tokens;
|
|
private readonly ObjectPool<StringBuilder> _stringBuilderPool;
|
|
private ISearchExpression? _searchExprCurrent;
|
|
|
|
/// <param name="savefile">
|
|
/// Uncompressed stream of <c>gamestate</c> file which can be extracted from save archive
|
|
/// </param>
|
|
/// <param name="query">
|
|
/// Parsing whole save takes 10 seconds on mid pc and takes 1GB of RAM,
|
|
/// so you should specify what exactly you want to get from save file
|
|
/// </param>
|
|
public SaveParserEU4(Stream savefile, ISearchExpression? query)
|
|
{
|
|
_saveFile = savefile;
|
|
_searchExprCurrent = query;
|
|
const int tokenBufSize = 5;
|
|
_tokens = new BufferedEnumerator<Token>(LexTextSave(), tokenBufSize);
|
|
_stringBuilderPool = new DefaultObjectPool<StringBuilder>(
|
|
new StringBuilderPooledObjectPolicy
|
|
{
|
|
InitialCapacity = tokenBufSize * 13,
|
|
MaximumRetainedCapacity = tokenBufSize * 13,
|
|
});
|
|
}
|
|
|
|
protected IEnumerator<Token> LexTextSave()
|
|
{
|
|
string expectedHeader = "EU4txt";
|
|
byte[] headBytes = new byte[expectedHeader.Length];
|
|
_saveFile.ReadExactly(headBytes);
|
|
string headStr = Encoding.UTF8.GetString(headBytes);
|
|
if (headStr != expectedHeader)
|
|
throw new Exception($"Invalid gamestate header. Expected '{expectedHeader}', got '{headStr}'.");
|
|
|
|
StringBuilder strb = _stringBuilderPool.Get();
|
|
int line = 2;
|
|
int column = 0;
|
|
bool isQuoteOpen = false;
|
|
bool isStrInQuotes = false;
|
|
Token strToken = new()
|
|
{
|
|
type = TokenType.Invalid,
|
|
column = -1,
|
|
line = -1,
|
|
value = null,
|
|
};
|
|
|
|
bool TryCompleteStringToken()
|
|
{
|
|
if (isQuoteOpen)
|
|
return false;
|
|
|
|
// strings in quotes may be empty
|
|
if (!isStrInQuotes && (strb.Length <= 0 || strb[0] == '#'))
|
|
return false;
|
|
|
|
strToken = new Token
|
|
{
|
|
type = TokenType.StringOrNumber,
|
|
column = (short)(column - strb.Length),
|
|
line = line,
|
|
value = strb,
|
|
};
|
|
strb = _stringBuilderPool.Get();
|
|
isStrInQuotes = false;
|
|
return true;
|
|
}
|
|
|
|
while (_saveFile.CanRead)
|
|
{
|
|
int c = _saveFile.ReadByte();
|
|
column++;
|
|
switch (c)
|
|
{
|
|
case -1:
|
|
if (TryCompleteStringToken())
|
|
yield return strToken;
|
|
_stringBuilderPool.Return(strb);
|
|
yield break;
|
|
case '\"':
|
|
isQuoteOpen = !isQuoteOpen;
|
|
isStrInQuotes = true;
|
|
break;
|
|
case ' ':
|
|
case '\t':
|
|
case '\r':
|
|
if (TryCompleteStringToken())
|
|
yield return strToken;
|
|
break;
|
|
case '\n':
|
|
if (TryCompleteStringToken())
|
|
yield return strToken;
|
|
line++;
|
|
column = 0;
|
|
break;
|
|
case '=':
|
|
if (TryCompleteStringToken())
|
|
yield return strToken;
|
|
yield return new Token
|
|
{
|
|
type = TokenType.Equals,
|
|
line = line, column = (short)column
|
|
};
|
|
break;
|
|
case '{':
|
|
if (TryCompleteStringToken())
|
|
yield return strToken;
|
|
yield return new Token
|
|
{
|
|
type = TokenType.BracketOpen,
|
|
line = line, column = (short)column
|
|
};
|
|
break;
|
|
case '}':
|
|
if (TryCompleteStringToken())
|
|
yield return strToken;
|
|
yield return new Token
|
|
{
|
|
type = TokenType.BracketClose,
|
|
line = line, column = (short)column
|
|
};
|
|
break;
|
|
default:
|
|
// Skip control characters, which are invisible and causing frontend bugs.
|
|
// I dont know why there are so many of them in strings.
|
|
if (c >= 0x20)
|
|
strb.Append((char)c);
|
|
break;
|
|
}
|
|
}
|
|
|
|
_stringBuilderPool.Return(strb);
|
|
}
|
|
|
|
|
|
// doesn't move next
|
|
private object? ParseValue()
|
|
{
|
|
var tok = _tokens.Current.Value;
|
|
switch (tok.type)
|
|
{
|
|
case TokenType.StringOrNumber:
|
|
// string values can be empty
|
|
if(tok.value!.Length == 0)
|
|
return string.Empty;
|
|
|
|
string tokStr = tok.value.ToString();
|
|
_stringBuilderPool.Return(tok.value);
|
|
if (tokStr[0] != '-' && !char.IsDigit(tokStr[0]))
|
|
return tokStr;
|
|
if (tokStr.Contains('.') && double.TryParse(tokStr, out double d))
|
|
return d;
|
|
if (long.TryParse(tokStr, out long l))
|
|
return l;
|
|
return tokStr;
|
|
case TokenType.BracketOpen:
|
|
object obj = ParseListOrDict();
|
|
return obj;
|
|
case TokenType.BracketClose:
|
|
return null;
|
|
default:
|
|
throw new UnexpectedTokenException(tok);
|
|
}
|
|
}
|
|
|
|
|
|
// skips next value
|
|
/// <returns>true if skipped value, false if current token is closing bracket</returns>
|
|
private bool SkipValue()
|
|
{
|
|
var tok = _tokens.Current.Value;
|
|
switch (tok.type)
|
|
{
|
|
case TokenType.BracketOpen:
|
|
SkipObject();
|
|
return true;
|
|
case TokenType.StringOrNumber:
|
|
_stringBuilderPool.Return(tok.value!);
|
|
return true;
|
|
case TokenType.BracketClose:
|
|
return false;
|
|
default:
|
|
throw new UnexpectedTokenException(tok);
|
|
}
|
|
}
|
|
|
|
// skips all tokens inside curly braces block
|
|
private void SkipObject(int bracketBalance = 1)
|
|
{
|
|
while (bracketBalance != 0 && _tokens.MoveNext())
|
|
{
|
|
var tok = _tokens.Current.Value;
|
|
if (tok.type == TokenType.BracketOpen)
|
|
bracketBalance++;
|
|
else if (tok.type == TokenType.BracketClose)
|
|
bracketBalance--;
|
|
else if (tok.type == TokenType.StringOrNumber)
|
|
{
|
|
_stringBuilderPool.Return(tok.value!);
|
|
}
|
|
}
|
|
}
|
|
|
|
private static bool IsEmptyCollection(object value)
|
|
=> value is Dictionary<string, object> { Count: 0 } or List<object> { Count: 0 };
|
|
|
|
// doesn't move next
|
|
private object ParseListOrDict()
|
|
{
|
|
var first = _tokens.Current.Next;
|
|
var second = _tokens.Current.Next?.Next;
|
|
if (first?.Value.type == TokenType.StringOrNumber && second?.Value.type == TokenType.Equals)
|
|
return ParseDict();
|
|
|
|
return ParseList();
|
|
}
|
|
|
|
// moves next
|
|
private List<object> ParseList()
|
|
{
|
|
List<object> list = new();
|
|
for (int i = 0; ; i++)
|
|
{
|
|
if (!_tokens.MoveNext())
|
|
throw new Exception("Unexpected end of file");
|
|
|
|
ISearchExpression? searchExprNext = null;
|
|
if (_searchExprCurrent != null
|
|
&& !_searchExprCurrent.DoesMatch(new SearchArgs(i, string.Empty), out searchExprNext))
|
|
{
|
|
if(!SkipValue())
|
|
break;
|
|
continue;
|
|
}
|
|
var searchExprPrev = _searchExprCurrent;
|
|
_searchExprCurrent = searchExprNext;
|
|
object? value = ParseValue();
|
|
_searchExprCurrent = searchExprPrev;
|
|
if (value is null)
|
|
break;
|
|
|
|
// do dot add empty collections into list
|
|
if (IsEmptyCollection(value))
|
|
continue;
|
|
|
|
list.Add(value);
|
|
}
|
|
|
|
return list;
|
|
}
|
|
|
|
// moves next
|
|
private Dictionary<string, object> ParseDict()
|
|
{
|
|
Dictionary<string, object> dict = new();
|
|
|
|
// root is a dict without closing bracket, so this method must check _tokenIndex < _tokens.Count
|
|
for (int localIndex = 0; _tokens.MoveNext(); localIndex++)
|
|
{
|
|
var tok = _tokens.Current.Value;
|
|
// end of dictionary
|
|
if (tok.type == TokenType.BracketClose)
|
|
break;
|
|
|
|
// Saves may contain some blocks without key.
|
|
// Such blocks are skipped because idk where to put them.
|
|
// Example: `technology_group=tech_cannorian{ }
|
|
// { } { } { }`
|
|
if (tok.type == TokenType.BracketOpen)
|
|
{
|
|
SkipObject();
|
|
continue;
|
|
}
|
|
|
|
if (tok.type != TokenType.StringOrNumber)
|
|
throw new UnexpectedTokenException(tok);
|
|
|
|
var keySB = tok.value!;
|
|
|
|
// next token should be `=` or `{`
|
|
if (!_tokens.MoveNext())
|
|
throw new UnexpectedTokenException(tok);
|
|
tok = _tokens.Current.Value;
|
|
if (tok.type == TokenType.Equals)
|
|
{
|
|
// skip `=`
|
|
if (!_tokens.MoveNext())
|
|
throw new UnexpectedTokenException(tok);
|
|
}
|
|
// Saves may contain object definition without `=`.
|
|
// Example: `map_area_data {` instead of `map_area_data = {`
|
|
else if (tok.type != TokenType.BracketOpen)
|
|
{
|
|
throw new UnexpectedTokenException(tok);
|
|
}
|
|
|
|
ISearchExpression? searchExprNext = null;
|
|
if (_searchExprCurrent != null
|
|
&& !_searchExprCurrent.DoesMatch(new SearchArgs(localIndex, keySB), out searchExprNext))
|
|
{
|
|
if(!SkipValue())
|
|
throw new UnexpectedTokenException(_tokens.Current.Value);
|
|
_stringBuilderPool.Return(keySB);
|
|
continue;
|
|
}
|
|
|
|
var searExpressionPrevious = _searchExprCurrent;
|
|
_searchExprCurrent = searchExprNext;
|
|
object? value = ParseValue();
|
|
if (value is null)
|
|
throw new UnexpectedTokenException(_tokens.Current.Value);
|
|
_searchExprCurrent = searExpressionPrevious;
|
|
|
|
string keyStr = keySB.ToString();
|
|
_stringBuilderPool.Return(keySB);
|
|
|
|
// Paradox save format has another way of defining list:
|
|
// a = 1
|
|
// a = 2
|
|
// It means `a = { 1 2 }`
|
|
if (dict.TryGetValue(keyStr, out var firstValue))
|
|
{
|
|
// Do dot add empty collections into list.
|
|
// `key:{}` is okay, but i don't want to see `key:[{},{},{},{},{},{}]`
|
|
if (IsEmptyCollection(value))
|
|
continue;
|
|
|
|
if (firstValue is List<object> existingList)
|
|
existingList.Add(value);
|
|
else dict[keyStr] = new List<object> { firstValue, value };
|
|
}
|
|
else
|
|
{
|
|
dict.Add(keyStr, value);
|
|
}
|
|
}
|
|
|
|
return dict;
|
|
}
|
|
|
|
public Dictionary<string, object> Parse()
|
|
{
|
|
var root = ParseDict();
|
|
return root;
|
|
}
|
|
|
|
protected enum TokenType : byte
|
|
{
|
|
Invalid,
|
|
StringOrNumber,
|
|
Equals,
|
|
BracketOpen,
|
|
BracketClose
|
|
}
|
|
|
|
protected struct Token
|
|
{
|
|
public required TokenType type;
|
|
public required short column;
|
|
public required int line;
|
|
public StringBuilder? value;
|
|
|
|
public override string ToString()
|
|
{
|
|
string s;
|
|
switch (type)
|
|
{
|
|
case TokenType.Invalid:
|
|
s = "INVALID_TOKEN";
|
|
break;
|
|
case TokenType.StringOrNumber:
|
|
if (value == null || value.Length == 0)
|
|
s = "NULL";
|
|
else s = value.ToString();
|
|
break;
|
|
case TokenType.Equals:
|
|
s = "=";
|
|
break;
|
|
case TokenType.BracketOpen:
|
|
s = "{";
|
|
break;
|
|
case TokenType.BracketClose:
|
|
s = "}";
|
|
break;
|
|
default:
|
|
throw new ArgumentOutOfRangeException(type.ToString());
|
|
}
|
|
|
|
return $"{line}:{column} '{s}'";
|
|
}
|
|
}
|
|
|
|
protected class UnexpectedTokenException : Exception
|
|
{
|
|
public UnexpectedTokenException(Token token) :
|
|
base($"Unexpected token: {token}")
|
|
{
|
|
}
|
|
}
|
|
} |