global using System; global using System.Collections.Generic; global using System.IO; global using System.Text; namespace ParadoxSaveParser.Lib; /// /// Sequential parser that doesn't cache anything. /// public class SaveParserEU4 { protected Stream _saveFile; private BufferedEnumerator _tokens; private ISearchExpression? _searchExprCurrent; /// Uncompressed stream of gamestate file which can be extracted from save archive /// Parsing whole save takes 10 seconds on mid pc and takes 1GB of RAM, /// so you should specify what exactly you want to get from save file public SaveParserEU4(Stream savefile, ISearchExpression? query) { _tokens = new BufferedEnumerator(LexTextSave(), 5); _saveFile = savefile; _searchExprCurrent = query; } protected enum TokenType : byte { Invalid, StringOrNumber, Equals, BracketOpen, BracketClose, } protected struct Token { public required TokenType type; public required short column; public required int line; public string? value; public override string ToString() { string s; switch (type) { case TokenType.Invalid: s = "INVALID_TOKEN"; break; case TokenType.StringOrNumber: s = value ?? "NULL"; break; case TokenType.Equals: s = "="; break; case TokenType.BracketOpen: s = "{"; break; case TokenType.BracketClose: s = "}"; break; default: throw new ArgumentOutOfRangeException(type.ToString()); } return $"{line}:{column} '{s}'"; } } protected class UnexpectedTokenException : Exception { public UnexpectedTokenException(Token token) : base($"Unexpected token: {token}") {} } protected IEnumerator LexTextSave() { string expectedHeader = "EU4txt"; byte[] headBytes = new byte[expectedHeader.Length]; _saveFile.ReadExactly(headBytes); string headStr = Encoding.UTF8.GetString(headBytes); if (headStr != expectedHeader) throw new Exception($"Invalid gamestate header. Expected '{expectedHeader}', got '{headStr}'."); StringBuilder str = new(); int line = 2; int column = 0; bool isQuoteOpen = false; bool isStrInQuotes = false; Token strToken = new() { type = TokenType.Invalid, column = -1, line = -1 }; bool TryCompleteStringToken() { if (isQuoteOpen) return false; // strings in quotes may be empty if (!isStrInQuotes && (str.Length <= 0 || str[0] == '#')) return false; strToken = new Token { type = TokenType.StringOrNumber, column = (short)(column - str.Length), line = line, value = str.ToString() }; str.Clear(); isStrInQuotes = false; return true; } while (_saveFile.CanRead) { int c = _saveFile.ReadByte(); column++; switch (c) { case -1: if(TryCompleteStringToken()) yield return strToken; yield break; case '\"': isQuoteOpen = !isQuoteOpen; isStrInQuotes = true; break; case ' ': case '\t': case '\r': if(TryCompleteStringToken()) yield return strToken; break; case '\n': if(TryCompleteStringToken()) yield return strToken; line++; column = 0; break; case '=': if(TryCompleteStringToken()) yield return strToken; yield return new Token { type = TokenType.Equals, line = line, column = (short)column }; break; case '{': if(TryCompleteStringToken()) yield return strToken; yield return new Token { type = TokenType.BracketOpen, line = line, column = (short)column }; break; case '}': if(TryCompleteStringToken()) yield return strToken; yield return new Token { type = TokenType.BracketClose, line = line, column = (short)column }; break; default: // Skip control characters, which are invisible and causing frontend bugs. // I dont know why there are so many of them in strings. if(c >= 0x20) str.Append((char)c); break; } } } // doesn't move next private object? ParseValue() { Token tok = _tokens.Current.Value; switch (tok.type) { case TokenType.StringOrNumber: if(string.IsNullOrEmpty(tok.value)) return string.Empty; if (tok.value[0] != '-' && !char.IsDigit(tok.value[0])) return tok.value; if(tok.value.Contains('.') && Double.TryParse(tok.value, out double d)) return d; if (Int64.TryParse(tok.value, out long l)) return l; return tok.value; case TokenType.BracketOpen: var obj = ParseListOrDict(); return obj; case TokenType.BracketClose: return null; default: throw new UnexpectedTokenException(tok); } } // skips next value /// true if skipped value, false if current token is closing bracket private bool SkipValue() { Token tok = _tokens.Current.Value; if (tok.type == TokenType.BracketOpen) { SkipObject(); return true; } return tok.type != TokenType.BracketClose; } // skips all tokens inside curly braces block private void SkipObject(int bracketBalance = 1) { while (bracketBalance != 0 && _tokens.MoveNext()) { Token tok = _tokens.Current.Value; if (tok.type == TokenType.BracketOpen) bracketBalance++; else if (tok.type == TokenType.BracketClose) bracketBalance--; } } // doesn't move next private object ParseListOrDict() { var first = _tokens.Current.Next; var second = _tokens.Current.Next?.Next; if (first?.Value.type == TokenType.StringOrNumber && second?.Value.type == TokenType.Equals) return ParseDict(); return ParseList(); } // moves next private List ParseList() { List list = new(); while(true) { if(!_tokens.MoveNext()) throw new Exception("Unexpected end of file"); object? value = ParseValue(); if (value is null) break; list.Add(value); } return list; } // moves next private Dictionary> ParseDict() { Dictionary> dict = new(); // root is a dict without closing bracket, so this method must check _tokenIndex < _tokens.Count for (int localIndex = 0; _tokens.MoveNext(); localIndex++) { Token tok = _tokens.Current.Value; // end of dictionary if (tok.type == TokenType.BracketClose) break; // Saves may contain some blocks without key. // Such blocks are skipped because idk where to put them. // Example: `technology_group=tech_cannorian{ } // { } { } { }` if (tok.type == TokenType.BracketOpen) { SkipObject(); continue; } if(tok.type != TokenType.StringOrNumber) throw new UnexpectedTokenException(tok); string key = tok.value!; // next token should be `=` or `{` if(!_tokens.MoveNext()) throw new UnexpectedTokenException(tok); tok = _tokens.Current.Value; if (tok.type == TokenType.Equals) { // skip `=` if (!_tokens.MoveNext()) throw new UnexpectedTokenException(tok); } // Saves may contain object definition without `=`. // Example: `map_area_data {` instead of `map_area_data = {` else if (tok.type != TokenType.BracketOpen) throw new UnexpectedTokenException(tok); ISearchExpression? searchExprNext = null; if (_searchExprCurrent != null && !_searchExprCurrent.DoesMatch(new SearchArgs(key, localIndex), out searchExprNext)) { SkipValue(); continue; } var searExpressionPrevious = _searchExprCurrent; _searchExprCurrent = searchExprNext; object? value = ParseValue(); if (value is null) throw new UnexpectedTokenException(_tokens.Current.Value); _searchExprCurrent = searExpressionPrevious; if(!dict.TryGetValue(key, out List? list)) { list = new List(); dict.Add(key, list); } list.Add(value); } return dict; } public Dictionary> Parse() { var root = ParseDict(); return root; } }