337 lines
11 KiB
C#
337 lines
11 KiB
C#
global using System;
|
|
global using System.Collections.Generic;
|
|
global using System.IO;
|
|
global using System.Text;
|
|
|
|
namespace ParadoxSaveParser.Lib;
|
|
|
|
/// <summary>
|
|
/// Sequential parser that doesn't cache anything.
|
|
/// </summary>
|
|
public class SaveParserEU4
|
|
{
|
|
protected Stream _saveFile;
|
|
private BufferedEnumerator<Token> _tokens;
|
|
private SearchExpression _query;
|
|
private int _currentDepth;
|
|
|
|
/// <param name="savefile">Uncompressed stream of <c>gamestate</c> file which can be extracted from save archive</param>
|
|
/// <param name="query">Parsing whole save takes 10 seconds on mid pc and takes 1GB of RAM,
|
|
/// so you should specify what exactly you want to get from save file</param>
|
|
public SaveParserEU4(Stream savefile, SearchExpression query)
|
|
{
|
|
_tokens = new BufferedEnumerator<Token>(LexTextSave(), 5);
|
|
_saveFile = savefile;
|
|
_query = query;
|
|
}
|
|
|
|
protected enum TokenType : byte
|
|
{
|
|
Invalid,
|
|
StringOrNumber,
|
|
Equals,
|
|
BracketOpen,
|
|
BracketClose,
|
|
}
|
|
|
|
protected struct Token
|
|
{
|
|
public required TokenType type;
|
|
public required short column;
|
|
public required int line;
|
|
public string? value;
|
|
|
|
public override string ToString()
|
|
{
|
|
string s;
|
|
switch (type)
|
|
{
|
|
case TokenType.Invalid:
|
|
s = "INVALID_TOKEN";
|
|
break;
|
|
case TokenType.StringOrNumber:
|
|
s = value ?? "NULL";
|
|
break;
|
|
case TokenType.Equals:
|
|
s = "=";
|
|
break;
|
|
case TokenType.BracketOpen:
|
|
s = "{";
|
|
break;
|
|
case TokenType.BracketClose:
|
|
s = "}";
|
|
break;
|
|
default:
|
|
throw new ArgumentOutOfRangeException(type.ToString());
|
|
}
|
|
|
|
return $"{line}:{column} '{s}'";
|
|
}
|
|
}
|
|
|
|
protected class UnexpectedTokenException : Exception
|
|
{
|
|
public UnexpectedTokenException(Token token) :
|
|
base($"Unexpected token: {token}")
|
|
{}
|
|
}
|
|
|
|
protected IEnumerator<Token> LexTextSave()
|
|
{
|
|
string expectedHeader = "EU4txt";
|
|
byte[] headBytes = new byte[expectedHeader.Length];
|
|
_saveFile.ReadExactly(headBytes);
|
|
string headStr = Encoding.UTF8.GetString(headBytes);
|
|
if (headStr != expectedHeader)
|
|
throw new Exception($"Invalid gamestate header. Expected '{expectedHeader}', got '{headStr}'.");
|
|
|
|
StringBuilder str = new();
|
|
int line = 2;
|
|
int column = 0;
|
|
bool isQuoteOpen = false;
|
|
bool isStrInQuotes = false;
|
|
Token strToken = new()
|
|
{
|
|
type = TokenType.Invalid,
|
|
column = -1,
|
|
line = -1
|
|
};
|
|
|
|
bool TryCompleteStringToken()
|
|
{
|
|
if (isQuoteOpen)
|
|
return false;
|
|
|
|
// strings in quotes may be empty
|
|
if (!isStrInQuotes && (str.Length <= 0 || str[0] == '#'))
|
|
return false;
|
|
|
|
strToken = new Token
|
|
{
|
|
type = TokenType.StringOrNumber,
|
|
column = (short)(column - str.Length),
|
|
line = line,
|
|
value = str.ToString()
|
|
};
|
|
str.Clear();
|
|
isStrInQuotes = false;
|
|
return true;
|
|
}
|
|
|
|
while (_saveFile.CanRead)
|
|
{
|
|
int c = _saveFile.ReadByte();
|
|
column++;
|
|
switch (c)
|
|
{
|
|
case -1:
|
|
if(TryCompleteStringToken())
|
|
yield return strToken;
|
|
yield break;
|
|
case '\"':
|
|
isQuoteOpen = !isQuoteOpen;
|
|
isStrInQuotes = true;
|
|
break;
|
|
case ' ':
|
|
case '\t':
|
|
case '\r':
|
|
if(TryCompleteStringToken())
|
|
yield return strToken;
|
|
break;
|
|
case '\n':
|
|
if(TryCompleteStringToken())
|
|
yield return strToken;
|
|
line++;
|
|
column = 0;
|
|
break;
|
|
case '=':
|
|
if(TryCompleteStringToken())
|
|
yield return strToken;
|
|
yield return new Token
|
|
{
|
|
type = TokenType.Equals,
|
|
line = line, column = (short)column
|
|
};
|
|
break;
|
|
case '{':
|
|
if(TryCompleteStringToken())
|
|
yield return strToken;
|
|
yield return new Token
|
|
{
|
|
type = TokenType.BracketOpen,
|
|
line = line, column = (short)column
|
|
};
|
|
break;
|
|
case '}':
|
|
if(TryCompleteStringToken())
|
|
yield return strToken;
|
|
yield return new Token
|
|
{
|
|
type = TokenType.BracketClose,
|
|
line = line, column = (short)column
|
|
};
|
|
break;
|
|
default:
|
|
// Skip control characters, which are invisible and causing frontend bugs.
|
|
// I dont know why there are so many of them in strings.
|
|
if(c >= 0x20)
|
|
str.Append((char)c);
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
// doesn't move next
|
|
private object? ParseValue()
|
|
{
|
|
Token tok = _tokens.Current.Value;
|
|
switch (tok.type)
|
|
{
|
|
case TokenType.StringOrNumber:
|
|
if(string.IsNullOrEmpty(tok.value))
|
|
return string.Empty;
|
|
if (tok.value[0] != '-' && !char.IsDigit(tok.value[0]))
|
|
return tok.value;
|
|
if(tok.value.Contains('.') && Double.TryParse(tok.value, out double d))
|
|
return d;
|
|
if (Int64.TryParse(tok.value, out long l))
|
|
return l;
|
|
return tok.value;
|
|
case TokenType.BracketOpen:
|
|
_currentDepth++;
|
|
var obj = ParseListOrDict();
|
|
_currentDepth--;
|
|
return obj;
|
|
case TokenType.BracketClose:
|
|
return null;
|
|
default:
|
|
throw new UnexpectedTokenException(tok);
|
|
}
|
|
}
|
|
|
|
|
|
// skips next value
|
|
/// <returns>true if skipped value, false if current token is closing bracket</returns>
|
|
private bool SkipValue()
|
|
{
|
|
Token tok = _tokens.Current.Value;
|
|
if (tok.type == TokenType.BracketOpen)
|
|
{
|
|
SkipObject();
|
|
return true;
|
|
}
|
|
|
|
return tok.type != TokenType.BracketClose;
|
|
}
|
|
|
|
// skips all tokens inside curly braces block
|
|
private void SkipObject(int bracketBalance = 1)
|
|
{
|
|
while (bracketBalance != 0 && _tokens.MoveNext())
|
|
{
|
|
Token tok = _tokens.Current.Value;
|
|
if (tok.type == TokenType.BracketOpen)
|
|
bracketBalance++;
|
|
else if (tok.type == TokenType.BracketClose)
|
|
bracketBalance--;
|
|
}
|
|
}
|
|
|
|
// doesn't move next
|
|
private object ParseListOrDict()
|
|
{
|
|
var first = _tokens.Current.Next;
|
|
var second = _tokens.Current.Next?.Next;
|
|
if (first?.Value.type == TokenType.StringOrNumber && second?.Value.type == TokenType.Equals)
|
|
return ParseDict();
|
|
|
|
return ParseList();
|
|
}
|
|
|
|
// moves next
|
|
private List<object> ParseList()
|
|
{
|
|
List<object> list = new();
|
|
while(true)
|
|
{
|
|
if(!_tokens.MoveNext())
|
|
throw new Exception("Unexpected end of file");
|
|
object? value = ParseValue();
|
|
if (value is null)
|
|
break;
|
|
list.Add(value);
|
|
}
|
|
return list;
|
|
}
|
|
|
|
// moves next
|
|
private Dictionary<string, List<object>> ParseDict()
|
|
{
|
|
Dictionary<string, List<object>> dict = new();
|
|
|
|
// root is a dict without closing bracket, so this method must check _tokenIndex < _tokens.Count
|
|
for (int localIndex = 0; _tokens.MoveNext(); localIndex++)
|
|
{
|
|
Token tok = _tokens.Current.Value;
|
|
// end of dictionary
|
|
if (tok.type == TokenType.BracketClose)
|
|
break;
|
|
|
|
// Saves may contain some blocks without key.
|
|
// Such blocks are skipped because idk where to put them.
|
|
// Example: `technology_group=tech_cannorian{ }
|
|
// { } { } { }`
|
|
if (tok.type == TokenType.BracketOpen)
|
|
{
|
|
SkipObject();
|
|
continue;
|
|
}
|
|
|
|
if(tok.type != TokenType.StringOrNumber)
|
|
throw new UnexpectedTokenException(tok);
|
|
|
|
string key = tok.value!;
|
|
|
|
// next token should be `=` or `{`
|
|
if(!_tokens.MoveNext())
|
|
throw new UnexpectedTokenException(tok);
|
|
tok = _tokens.Current.Value;
|
|
if (tok.type == TokenType.Equals)
|
|
{
|
|
// skip `=`
|
|
if (!_tokens.MoveNext())
|
|
throw new UnexpectedTokenException(tok);
|
|
}
|
|
// Saves may contain object definition without `=`.
|
|
// Example: `map_area_data {` instead of `map_area_data = {`
|
|
else if (tok.type != TokenType.BracketOpen)
|
|
throw new UnexpectedTokenException(tok);
|
|
|
|
if (!_query.DoesMatch(new SearchArgs(key, _currentDepth, localIndex)))
|
|
{
|
|
SkipValue();
|
|
continue;
|
|
}
|
|
|
|
object? value = ParseValue();
|
|
if (value is null)
|
|
throw new UnexpectedTokenException(_tokens.Current.Value);
|
|
|
|
if(!dict.TryGetValue(key, out List<object>? list))
|
|
{
|
|
list = new List<object>();
|
|
dict.Add(key, list);
|
|
}
|
|
list.Add(value);
|
|
}
|
|
|
|
return dict;
|
|
}
|
|
|
|
public Dictionary<string, List<object>> Parse()
|
|
{
|
|
var root = ParseDict();
|
|
return root;
|
|
}
|
|
} |