From bf00ffe291cf35d7662a7ba7cfe027c87cf1fce1 Mon Sep 17 00:00:00 2001 From: Timerix Date: Sat, 22 Mar 2025 20:39:06 +0500 Subject: [PATCH] BufferedEnumerator that keeps 5 tokens in memory instead of 12,000,000 --- ParadoxSaveParser.Lib/BufferedEnumerator.cs | 83 +++++++++++++ ParadoxSaveParser.Lib/Parser.cs | 126 ++++++++++++-------- ParadoxSaveParser.WebAPI/Program.cs | 1 - 3 files changed, 156 insertions(+), 54 deletions(-) create mode 100644 ParadoxSaveParser.Lib/BufferedEnumerator.cs diff --git a/ParadoxSaveParser.Lib/BufferedEnumerator.cs b/ParadoxSaveParser.Lib/BufferedEnumerator.cs new file mode 100644 index 0000000..329d450 --- /dev/null +++ b/ParadoxSaveParser.Lib/BufferedEnumerator.cs @@ -0,0 +1,83 @@ +using System.Collections; + +namespace ParadoxSaveParser.Lib; + +/// +/// Enumerator wrapper that stores N/2 items before and N/2-1 after Current item. +/// +/// +/// IEnumerator<int> Enumerator() +/// { +/// for(int i = 0; i < 6; i++) +/// yield return i; +/// } +/// +/// var en = Enumerator(); +/// var bufen = new BufferedEnumerator<int>(en, 5); +/// +/// while(bufen.MoveNext()) +/// { +/// var cur = bufen.Current; +/// for (var prev = cur.List?.First; prev != cur; prev = prev?.Next) +/// Console.Write($"{prev?.Value} "); +/// +/// Console.Write($"| {cur.Value} |"); +/// +/// for (var next = cur.Next; next != null; next = next.Next) +/// Console.Write($" {next.Value}"); +/// Console.WriteLine(); +/// } +/// +/// Output: +/// +/// | 0 | 1 2 3 4 +/// 0 | 1 | 2 3 4 +/// 0 1 | 2 | 3 4 +/// 1 2 | 3 | 4 5 +/// 2 3 | 4 | 5 +/// 3 4 | 5 | +/// +public class BufferedEnumerator : IEnumerator> +{ + private IEnumerator _enumerator; + private int _bufferSize; + LinkedList _llist = new(); + private LinkedListNode? _currentNode; + private int _currentNodeIndex = -1; + + public BufferedEnumerator(IEnumerator enumerator, int bufferSize) + { + _enumerator = enumerator; + _bufferSize = bufferSize; + } + + public bool MoveNext() + { + if(_currentNodeIndex >= _bufferSize / 2) + _llist.RemoveFirst(); + + while (_llist.Count < _bufferSize && _enumerator.MoveNext()) + { + _llist.AddLast(_enumerator.Current); + } + if (_llist.Count == 0) + return false; + + _currentNodeIndex++; + _currentNode = _currentNode == null ? _llist.First : _currentNode.Next; + return _currentNode != null; + } + + public void Reset() + { + throw new NotImplementedException(); + } + + public LinkedListNode Current => _currentNode!; + + object IEnumerator.Current => Current; + + public void Dispose() + { + } +} \ No newline at end of file diff --git a/ParadoxSaveParser.Lib/Parser.cs b/ParadoxSaveParser.Lib/Parser.cs index aee95b8..1395c19 100644 --- a/ParadoxSaveParser.Lib/Parser.cs +++ b/ParadoxSaveParser.Lib/Parser.cs @@ -8,11 +8,11 @@ namespace ParadoxSaveParser.Lib; public class Parser { protected Stream _saveFile; - private List _tokens = new(4_194_304); - private int _tokenIndex; + private BufferedEnumerator _tokens; public Parser(Stream savefile) { + _tokens = new BufferedEnumerator(Lex(), 5); _saveFile = savefile; } @@ -27,9 +27,9 @@ public class Parser protected struct Token { - public TokenType type; - public short column; - public int line; + public required TokenType type; + public required short column; + public required int line; public string? value; public override string ToString() @@ -60,10 +60,8 @@ public class Parser } } - protected void Lex() + protected IEnumerator Lex() { - _tokens.Clear(); - string expectedHeader = "EU4txt"; byte[] headBytes = new byte[expectedHeader.Length]; _saveFile.ReadExactly(headBytes); @@ -76,23 +74,31 @@ public class Parser int column = 0; bool isQuoteOpen = false; bool isStrInQuotes = false; - - void CompleteStringToken() + Token strToken = new() + { + type = TokenType.Invalid, + column = -1, + line = -1 + }; + + bool TryCompleteStringToken() { if (isQuoteOpen) - return; + return false; // strings in quotes can be empty if (!isStrInQuotes && (str.Length <= 0 || str[0] == '#')) - return; - _tokens.Add(new Token + return false; + + strToken = new Token { type = TokenType.StringOrNumber, column = (short)(column - str.Length), line = line, value = str.ToString() - }); + }; str.Clear(); isStrInQuotes = false; + return true; } while (_saveFile.CanRead) @@ -102,8 +108,9 @@ public class Parser switch (c) { case -1: - CompleteStringToken(); - return; + if(TryCompleteStringToken()) + yield return strToken; + yield break; case '\"': isQuoteOpen = !isQuoteOpen; isStrInQuotes = true; @@ -111,36 +118,41 @@ public class Parser case ' ': case '\t': case '\r': - CompleteStringToken(); + if(TryCompleteStringToken()) + yield return strToken; break; case '\n': - CompleteStringToken(); + if(TryCompleteStringToken()) + yield return strToken; line++; column = 0; break; case '=': - CompleteStringToken(); - _tokens.Add(new Token + if(TryCompleteStringToken()) + yield return strToken; + yield return new Token { type = TokenType.Equals, line = line, column = (short)column - }); + }; break; case '{': - CompleteStringToken(); - _tokens.Add(new Token + if(TryCompleteStringToken()) + yield return strToken; + yield return new Token { type = TokenType.BracketOpen, line = line, column = (short)column - }); + }; break; case '}': - CompleteStringToken(); - _tokens.Add(new Token + if(TryCompleteStringToken()) + yield return strToken; + yield return new Token { type = TokenType.BracketClose, line = line, column = (short)column - }); + }; break; default: // Skip control characters, which are invisible and causing frontend bugs. @@ -154,15 +166,16 @@ public class Parser protected class UnexpectedTokenException : Exception { - public UnexpectedTokenException(Token token, int tokenIndex) : - base($"Unexpected token at index {tokenIndex}: {token}") + public UnexpectedTokenException(Token token) : + base($"Unexpected token: {token}") {} } + // doesn't move next private object? ParseValue() { - Token tok = _tokens[_tokenIndex++]; + Token tok = _tokens.Current.Value; switch (tok.type) { case TokenType.StringOrNumber: @@ -180,25 +193,29 @@ public class Parser case TokenType.BracketClose: return null; default: - throw new UnexpectedTokenException(tok, _tokenIndex - 1); + throw new UnexpectedTokenException(tok); } } + // doesn't move next private object ParseListOrDict() { - Token first = _tokens[_tokenIndex]; - Token second = _tokens[_tokenIndex + 1]; - if (first.type == TokenType.StringOrNumber && second.type == TokenType.Equals) + var first = _tokens.Current.Next; + var second = _tokens.Current.Next?.Next; + if (first?.Value.type == TokenType.StringOrNumber && second?.Value.type == TokenType.Equals) return ParseDict(); return ParseList(); } + // moves next private List ParseList() { List list = new(); while(true) { + if(!_tokens.MoveNext()) + throw new Exception("Unexpected end of file"); object? value = ParseValue(); if (value == null) break; @@ -208,13 +225,14 @@ public class Parser } + // moves next private Dictionary> ParseDict() { Dictionary> dict = new(); // root is a dict without closing bracket, so this method must check _tokenIndex < _tokens.Count - while (_tokenIndex < _tokens.Count) + while (_tokens.MoveNext()) { - Token tok = _tokens[_tokenIndex++]; + Token tok = _tokens.Current.Value; // end of dictionary if (tok.type == TokenType.BracketClose) break; @@ -226,9 +244,9 @@ public class Parser if (tok.type == TokenType.BracketOpen) { int bracketBalance = 1; - while (bracketBalance != 0) + while (bracketBalance != 0 && _tokens.MoveNext()) { - tok = _tokens[_tokenIndex++]; + tok = _tokens.Current.Value; if (tok.type == TokenType.BracketOpen) bracketBalance++; else if (tok.type == TokenType.BracketClose) @@ -239,24 +257,29 @@ public class Parser } if(tok.type != TokenType.StringOrNumber) - throw new UnexpectedTokenException(tok, _tokenIndex - 1); + throw new UnexpectedTokenException(tok); string key = tok.value!; - tok = _tokens[_tokenIndex++]; - if (tok.type == TokenType.BracketOpen) + // next token should be `=` or `{` + if(!_tokens.MoveNext()) + throw new UnexpectedTokenException(tok); + tok = _tokens.Current.Value; + if (tok.type == TokenType.Equals) { - // Saves may contain key-value definition without `=`. - // Example: `map_area_data{` instead of `map_area_data = {` - _tokenIndex--; + // skip `=` + if (!_tokens.MoveNext()) + throw new UnexpectedTokenException(tok); } - else if(tok.type != TokenType.Equals) - throw new UnexpectedTokenException(tok, _tokenIndex - 1); - + // Saves may contain object definition without `=`. + // Example: `map_area_data {` instead of `map_area_data = {` + else if (tok.type != TokenType.BracketOpen) + throw new UnexpectedTokenException(tok); object? value = ParseValue(); if (value == null) - throw new UnexpectedTokenException(_tokens[_tokenIndex - 1], _tokenIndex - 1); + throw new UnexpectedTokenException(_tokens.Current.Value); + if(!dict.TryGetValue(key, out List? list)) { list = new List(); @@ -270,12 +293,9 @@ public class Parser public Dictionary> Parse() { - Lex(); - if (_tokens.Count == 0) - throw new Exception("Save file is empty"); - - _tokenIndex = 0; var root = ParseDict(); + if (root.Count == 0) + throw new Exception("Save file is empty"); return root; } } \ No newline at end of file diff --git a/ParadoxSaveParser.WebAPI/Program.cs b/ParadoxSaveParser.WebAPI/Program.cs index 42b3669..9dce370 100644 --- a/ParadoxSaveParser.WebAPI/Program.cs +++ b/ParadoxSaveParser.WebAPI/Program.cs @@ -156,7 +156,6 @@ public class Program _app.Logger.Log(LogLevel.Error, "ParseSaveEU4 Error: {errorMesage}", errorMesage); } - GC.Collect(); await httpContext.Response.WriteAsJsonAsync(meta); } } \ No newline at end of file