global using System; global using System.Collections.Generic; global using System.IO; global using System.Text; using Microsoft.Extensions.ObjectPool; namespace ParadoxSaveParser.Lib; /// /// Sequential parser that doesn't cache anything. /// public class SaveParserEU4 { protected readonly Stream _saveFile; private readonly BufferedEnumerator _tokens; private readonly ObjectPool _stringBuilderPool; private ISearchExpression? _searchExprCurrent; /// /// Uncompressed stream of gamestate file which can be extracted from save archive /// /// /// Parsing whole save takes 10 seconds on mid pc and takes 1GB of RAM, /// so you should specify what exactly you want to get from save file /// public SaveParserEU4(Stream savefile, ISearchExpression? query) { _saveFile = savefile; _searchExprCurrent = query; const int tokenBufSize = 5; _tokens = new BufferedEnumerator(LexTextSave(), tokenBufSize); _stringBuilderPool = new DefaultObjectPool( new StringBuilderPooledObjectPolicy { InitialCapacity = tokenBufSize * 13, MaximumRetainedCapacity = tokenBufSize * 13, }); } protected IEnumerator LexTextSave() { string expectedHeader = "EU4txt"; byte[] headBytes = new byte[expectedHeader.Length]; _saveFile.ReadExactly(headBytes); string headStr = Encoding.UTF8.GetString(headBytes); if (headStr != expectedHeader) throw new Exception($"Invalid gamestate header. Expected '{expectedHeader}', got '{headStr}'."); StringBuilder strb = _stringBuilderPool.Get(); int line = 2; int column = 0; bool isQuoteOpen = false; bool isStrInQuotes = false; Token strToken = new() { type = TokenType.Invalid, column = -1, line = -1, value = null, }; bool TryCompleteStringToken() { if (isQuoteOpen) return false; // strings in quotes may be empty if (!isStrInQuotes && (strb.Length <= 0 || strb[0] == '#')) return false; strToken = new Token { type = TokenType.StringOrNumber, column = (short)(column - strb.Length), line = line, value = strb, }; strb = _stringBuilderPool.Get(); isStrInQuotes = false; return true; } while (_saveFile.CanRead) { int c = _saveFile.ReadByte(); column++; switch (c) { case -1: if (TryCompleteStringToken()) yield return strToken; _stringBuilderPool.Return(strb); yield break; case '\"': isQuoteOpen = !isQuoteOpen; isStrInQuotes = true; break; case ' ': case '\t': case '\r': if (TryCompleteStringToken()) yield return strToken; break; case '\n': if (TryCompleteStringToken()) yield return strToken; line++; column = 0; break; case '=': if (TryCompleteStringToken()) yield return strToken; yield return new Token { type = TokenType.Equals, line = line, column = (short)column }; break; case '{': if (TryCompleteStringToken()) yield return strToken; yield return new Token { type = TokenType.BracketOpen, line = line, column = (short)column }; break; case '}': if (TryCompleteStringToken()) yield return strToken; yield return new Token { type = TokenType.BracketClose, line = line, column = (short)column }; break; default: // Skip control characters, which are invisible and causing frontend bugs. // I dont know why there are so many of them in strings. if (c >= 0x20) strb.Append((char)c); break; } } _stringBuilderPool.Return(strb); } // doesn't move next private object? ParseValue() { var tok = _tokens.Current.Value; switch (tok.type) { case TokenType.StringOrNumber: // string values can be empty if(tok.value!.Length == 0) return string.Empty; string tokStr = tok.value.ToString(); _stringBuilderPool.Return(tok.value); if (tokStr[0] != '-' && !char.IsDigit(tokStr[0])) return tokStr; if (tokStr.Contains('.') && double.TryParse(tokStr, out double d)) return d; if (long.TryParse(tokStr, out long l)) return l; return tokStr; case TokenType.BracketOpen: object obj = ParseListOrDict(); return obj; case TokenType.BracketClose: return null; default: throw new UnexpectedTokenException(tok); } } // skips next value /// true if skipped value, false if current token is closing bracket private bool SkipValue() { var tok = _tokens.Current.Value; switch (tok.type) { case TokenType.BracketOpen: SkipObject(); return true; case TokenType.StringOrNumber: _stringBuilderPool.Return(tok.value!); return true; case TokenType.BracketClose: return false; default: throw new UnexpectedTokenException(tok); } } // skips all tokens inside curly braces block private void SkipObject(int bracketBalance = 1) { while (bracketBalance != 0 && _tokens.MoveNext()) { var tok = _tokens.Current.Value; if (tok.type == TokenType.BracketOpen) bracketBalance++; else if (tok.type == TokenType.BracketClose) bracketBalance--; else if (tok.type == TokenType.StringOrNumber) { _stringBuilderPool.Return(tok.value!); } } } private static bool IsEmptyCollection(object value) => value is Dictionary { Count: 0 } or List { Count: 0 }; // doesn't move next private object ParseListOrDict() { var first = _tokens.Current.Next; var second = _tokens.Current.Next?.Next; if (first?.Value.type == TokenType.StringOrNumber && second?.Value.type == TokenType.Equals) return ParseDict(); return ParseList(); } // moves next private List ParseList() { List list = new(); for (int i = 0; ; i++) { if (!_tokens.MoveNext()) throw new Exception("Unexpected end of file"); ISearchExpression? searchExprNext = null; if (_searchExprCurrent != null && !_searchExprCurrent.DoesMatch(new SearchArgs(i, string.Empty), out searchExprNext)) { if(!SkipValue()) break; continue; } var searchExprPrev = _searchExprCurrent; _searchExprCurrent = searchExprNext; object? value = ParseValue(); _searchExprCurrent = searchExprPrev; if (value is null) break; // do dot add empty collections into list if (IsEmptyCollection(value)) continue; list.Add(value); } return list; } // moves next private Dictionary ParseDict() { Dictionary dict = new(); // root is a dict without closing bracket, so this method must check _tokenIndex < _tokens.Count for (int localIndex = 0; _tokens.MoveNext(); localIndex++) { var tok = _tokens.Current.Value; // end of dictionary if (tok.type == TokenType.BracketClose) break; // Saves may contain some blocks without key. // Such blocks are skipped because idk where to put them. // Example: `technology_group=tech_cannorian{ } // { } { } { }` if (tok.type == TokenType.BracketOpen) { SkipObject(); continue; } if (tok.type != TokenType.StringOrNumber) throw new UnexpectedTokenException(tok); var keySB = tok.value!; // next token should be `=` or `{` if (!_tokens.MoveNext()) throw new UnexpectedTokenException(tok); tok = _tokens.Current.Value; if (tok.type == TokenType.Equals) { // skip `=` if (!_tokens.MoveNext()) throw new UnexpectedTokenException(tok); } // Saves may contain object definition without `=`. // Example: `map_area_data {` instead of `map_area_data = {` else if (tok.type != TokenType.BracketOpen) { throw new UnexpectedTokenException(tok); } ISearchExpression? searchExprNext = null; if (_searchExprCurrent != null && !_searchExprCurrent.DoesMatch(new SearchArgs(localIndex, keySB), out searchExprNext)) { if(!SkipValue()) throw new UnexpectedTokenException(_tokens.Current.Value); _stringBuilderPool.Return(keySB); continue; } var searExpressionPrevious = _searchExprCurrent; _searchExprCurrent = searchExprNext; object? value = ParseValue(); if (value is null) throw new UnexpectedTokenException(_tokens.Current.Value); _searchExprCurrent = searExpressionPrevious; string keyStr = keySB.ToString(); _stringBuilderPool.Return(keySB); // Paradox save format has another way of defining list: // a = 1 // a = 2 // It means `a = { 1 2 }` if (dict.TryGetValue(keyStr, out var firstValue)) { // Do dot add empty collections into list. // `key:{}` is okay, but i don't want to see `key:[{},{},{},{},{},{}]` if (IsEmptyCollection(value)) continue; if (firstValue is List existingList) existingList.Add(value); else dict[keyStr] = new List { firstValue, value }; } else { dict.Add(keyStr, value); } } return dict; } public Dictionary Parse() { var root = ParseDict(); return root; } protected enum TokenType : byte { Invalid, StringOrNumber, Equals, BracketOpen, BracketClose } protected struct Token { public required TokenType type; public required short column; public required int line; public StringBuilder? value; public override string ToString() { string s; switch (type) { case TokenType.Invalid: s = "INVALID_TOKEN"; break; case TokenType.StringOrNumber: if (value == null || value.Length == 0) s = "NULL"; else s = value.ToString(); break; case TokenType.Equals: s = "="; break; case TokenType.BracketOpen: s = "{"; break; case TokenType.BracketClose: s = "}"; break; default: throw new ArgumentOutOfRangeException(type.ToString()); } return $"{line}:{column} '{s}'"; } } protected class UnexpectedTokenException : Exception { public UnexpectedTokenException(Token token) : base($"Unexpected token: {token}") { } } }