global using System; global using System.Collections.Generic; global using System.IO; global using System.Text; using Microsoft.Extensions.ObjectPool; namespace ParadoxSaveParser.Lib; /// /// Sequential parser that doesn't cache anything. /// public class SaveParserEU4 { protected readonly Stream _saveFile; private readonly BufferedEnumerator _tokens; private readonly ObjectPool _stringBuilderPool; private ISearchExpression? _searchExprCurrent; public int SBPoolGetCount = 0; public int SBPoolReturnCount = 0; /// /// Uncompressed stream of gamestate file which can be extracted from save archive /// /// /// Parsing whole save takes 10 seconds on mid pc and takes 1GB of RAM, /// so you should specify what exactly you want to get from save file /// public SaveParserEU4(Stream savefile, ISearchExpression? query) { _saveFile = savefile; _searchExprCurrent = query; const int tokenBufSize = 5; _tokens = new BufferedEnumerator(LexTextSave(), tokenBufSize); _stringBuilderPool = new DefaultObjectPool( new StringBuilderPooledObjectPolicy { InitialCapacity = tokenBufSize * 13, MaximumRetainedCapacity = tokenBufSize * 13, }); } protected IEnumerator LexTextSave() { string expectedHeader = "EU4txt"; byte[] headBytes = new byte[expectedHeader.Length]; _saveFile.ReadExactly(headBytes); string headStr = Encoding.UTF8.GetString(headBytes); if (headStr != expectedHeader) throw new Exception($"Invalid gamestate header. Expected '{expectedHeader}', got '{headStr}'."); StringBuilder strb = _stringBuilderPool.Get(); SBPoolGetCount++; int line = 2; int column = 0; bool isQuoteOpen = false; bool isStrInQuotes = false; Token strToken = new() { type = TokenType.Invalid, column = -1, line = -1, value = null, }; bool TryCompleteStringToken() { if (isQuoteOpen) return false; // strings in quotes may be empty if (!isStrInQuotes && (strb.Length <= 0 || strb[0] == '#')) return false; strToken = new Token { type = TokenType.StringOrNumber, column = (short)(column - strb.Length), line = line, value = strb, }; strb = _stringBuilderPool.Get(); SBPoolGetCount++; isStrInQuotes = false; return true; } while (_saveFile.CanRead) { int c = _saveFile.ReadByte(); column++; switch (c) { case -1: if (TryCompleteStringToken()) yield return strToken; _stringBuilderPool.Return(strb); SBPoolReturnCount++; yield break; case '\"': isQuoteOpen = !isQuoteOpen; isStrInQuotes = true; break; case ' ': case '\t': case '\r': if (TryCompleteStringToken()) yield return strToken; break; case '\n': if (TryCompleteStringToken()) yield return strToken; line++; column = 0; break; case '=': if (TryCompleteStringToken()) yield return strToken; yield return new Token { type = TokenType.Equals, line = line, column = (short)column }; break; case '{': if (TryCompleteStringToken()) yield return strToken; yield return new Token { type = TokenType.BracketOpen, line = line, column = (short)column }; break; case '}': if (TryCompleteStringToken()) yield return strToken; yield return new Token { type = TokenType.BracketClose, line = line, column = (short)column }; break; default: // Skip control characters, which are invisible and causing frontend bugs. // I dont know why there are so many of them in strings. if (c >= 0x20) strb.Append((char)c); break; } } _stringBuilderPool.Return(strb); SBPoolReturnCount++; } // doesn't move next private object? ParseValue() { var tok = _tokens.Current.Value; switch (tok.type) { case TokenType.StringOrNumber: string tokStr = tok.value!.ToString(); _stringBuilderPool.Return(tok.value); SBPoolReturnCount++; if (tokStr[0] != '-' && !char.IsDigit(tokStr[0])) return tokStr; if (tokStr.Contains('.') && double.TryParse(tokStr, out double d)) return d; if (long.TryParse(tokStr, out long l)) return l; return tokStr; case TokenType.BracketOpen: object obj = ParseListOrDict(); return obj; case TokenType.BracketClose: return null; default: throw new UnexpectedTokenException(tok); } } // skips next value /// true if skipped value, false if current token is closing bracket private bool SkipValue() { var tok = _tokens.Current.Value; switch (tok.type) { case TokenType.BracketOpen: SkipObject(); return true; case TokenType.StringOrNumber: _stringBuilderPool.Return(tok.value!); SBPoolReturnCount++; return true; case TokenType.Equals: return true; case TokenType.BracketClose: return false; default: throw new UnexpectedTokenException(tok); } } // skips all tokens inside curly braces block private void SkipObject(int bracketBalance = 1) { while (bracketBalance != 0 && _tokens.MoveNext()) { var tok = _tokens.Current.Value; if (tok.type == TokenType.BracketOpen) bracketBalance++; else if (tok.type == TokenType.BracketClose) bracketBalance--; else if (tok.type == TokenType.StringOrNumber) { _stringBuilderPool.Return(tok.value!); SBPoolReturnCount++; } } } // doesn't move next private object ParseListOrDict() { var first = _tokens.Current.Next; var second = _tokens.Current.Next?.Next; if (first?.Value.type == TokenType.StringOrNumber && second?.Value.type == TokenType.Equals) return ParseDict(); return ParseList(); } // moves next private List ParseList() { List list = new(); while (true) { if (!_tokens.MoveNext()) throw new Exception("Unexpected end of file"); object? value = ParseValue(); if (value is null) break; list.Add(value); } return list; } // moves next private Dictionary> ParseDict() { Dictionary> dict = new(); // root is a dict without closing bracket, so this method must check _tokenIndex < _tokens.Count for (int localIndex = 0; _tokens.MoveNext(); localIndex++) { var tok = _tokens.Current.Value; // end of dictionary if (tok.type == TokenType.BracketClose) break; // Saves may contain some blocks without key. // Such blocks are skipped because idk where to put them. // Example: `technology_group=tech_cannorian{ } // { } { } { }` if (tok.type == TokenType.BracketOpen) { SkipObject(); continue; } if (tok.type != TokenType.StringOrNumber) throw new UnexpectedTokenException(tok); var keySB = tok.value!; // next token should be `=` or `{` if (!_tokens.MoveNext()) throw new UnexpectedTokenException(tok); tok = _tokens.Current.Value; if (tok.type == TokenType.Equals) { // skip `=` if (!_tokens.MoveNext()) throw new UnexpectedTokenException(tok); } // Saves may contain object definition without `=`. // Example: `map_area_data {` instead of `map_area_data = {` else if (tok.type != TokenType.BracketOpen) { throw new UnexpectedTokenException(tok); } ISearchExpression? searchExprNext = null; if (_searchExprCurrent != null && !_searchExprCurrent.DoesMatch(new SearchArgs(localIndex, keySB), out searchExprNext)) { SkipValue(); _stringBuilderPool.Return(keySB); SBPoolReturnCount++; continue; } var searExpressionPrevious = _searchExprCurrent; _searchExprCurrent = searchExprNext; object? value = ParseValue(); if (value is null) throw new UnexpectedTokenException(_tokens.Current.Value); _searchExprCurrent = searExpressionPrevious; string keyStr = keySB.ToString(); _stringBuilderPool.Return(keySB); SBPoolReturnCount++; if (!dict.TryGetValue(keyStr, out var list)) { list = new List(); dict.Add(keyStr, list); } list.Add(value); } return dict; } public Dictionary> Parse() { var root = ParseDict(); return root; } protected enum TokenType : byte { Invalid, StringOrNumber, Equals, BracketOpen, BracketClose } protected struct Token { public required TokenType type; public required short column; public required int line; public StringBuilder? value; public override string ToString() { string s; switch (type) { case TokenType.Invalid: s = "INVALID_TOKEN"; break; case TokenType.StringOrNumber: if (value == null || value.Length == 0) s = "NULL"; else s = value.ToString(); break; case TokenType.Equals: s = "="; break; case TokenType.BracketOpen: s = "{"; break; case TokenType.BracketClose: s = "}"; break; default: throw new ArgumentOutOfRangeException(type.ToString()); } return $"{line}:{column} '{s}'"; } } protected class UnexpectedTokenException : Exception { public UnexpectedTokenException(Token token) : base($"Unexpected token: {token}") { } } }