Compare commits

...

2 Commits

Author SHA1 Message Date
52d5320899 TODO.txt 2025-03-22 20:47:08 +05:00
bf00ffe291 BufferedEnumerator that keeps 5 tokens in memory instead of 12,000,000 2025-03-22 20:39:06 +05:00
5 changed files with 167 additions and 54 deletions

View File

@ -0,0 +1,83 @@
using System.Collections;
namespace ParadoxSaveParser.Lib;
/// <summary>
/// Enumerator wrapper that stores <c>N/2</c> items before and <c>N/2-1</c> after <c>Current</c> item.
/// </summary>
/// <code language="cs">
/// IEnumerator&lt;int&gt; Enumerator()
/// {
/// for(int i = 0; i &lt; 6; i++)
/// yield return i;
/// }
///
/// var en = Enumerator();
/// var bufen = new BufferedEnumerator&lt;int&gt;(en, 5);
///
/// while(bufen.MoveNext())
/// {
/// var cur = bufen.Current;
/// for (var prev = cur.List?.First; prev != cur; prev = prev?.Next)
/// Console.Write($"{prev?.Value} ");
///
/// Console.Write($"| {cur.Value} |");
///
/// for (var next = cur.Next; next != null; next = next.Next)
/// Console.Write($" {next.Value}");
/// Console.WriteLine();
/// }
/// </code>
/// Output:
/// <code>
/// | 0 | 1 2 3 4
/// 0 | 1 | 2 3 4
/// 0 1 | 2 | 3 4
/// 1 2 | 3 | 4 5
/// 2 3 | 4 | 5
/// 3 4 | 5 |
/// </code>
public class BufferedEnumerator<T> : IEnumerator<LinkedListNode<T>>
{
private IEnumerator<T> _enumerator;
private int _bufferSize;
LinkedList<T> _llist = new();
private LinkedListNode<T>? _currentNode;
private int _currentNodeIndex = -1;
public BufferedEnumerator(IEnumerator<T> enumerator, int bufferSize)
{
_enumerator = enumerator;
_bufferSize = bufferSize;
}
public bool MoveNext()
{
if(_currentNodeIndex >= _bufferSize / 2)
_llist.RemoveFirst();
while (_llist.Count < _bufferSize && _enumerator.MoveNext())
{
_llist.AddLast(_enumerator.Current);
}
if (_llist.Count == 0)
return false;
_currentNodeIndex++;
_currentNode = _currentNode == null ? _llist.First : _currentNode.Next;
return _currentNode != null;
}
public void Reset()
{
throw new NotImplementedException();
}
public LinkedListNode<T> Current => _currentNode!;
object IEnumerator.Current => Current;
public void Dispose()
{
}
}

View File

@ -8,11 +8,11 @@ namespace ParadoxSaveParser.Lib;
public class Parser
{
protected Stream _saveFile;
private List<Token> _tokens = new(4_194_304);
private int _tokenIndex;
private BufferedEnumerator<Token> _tokens;
public Parser(Stream savefile)
{
_tokens = new BufferedEnumerator<Token>(Lex(), 5);
_saveFile = savefile;
}
@ -27,9 +27,9 @@ public class Parser
protected struct Token
{
public TokenType type;
public short column;
public int line;
public required TokenType type;
public required short column;
public required int line;
public string? value;
public override string ToString()
@ -60,10 +60,8 @@ public class Parser
}
}
protected void Lex()
protected IEnumerator<Token> Lex()
{
_tokens.Clear();
string expectedHeader = "EU4txt";
byte[] headBytes = new byte[expectedHeader.Length];
_saveFile.ReadExactly(headBytes);
@ -76,23 +74,31 @@ public class Parser
int column = 0;
bool isQuoteOpen = false;
bool isStrInQuotes = false;
Token strToken = new()
{
type = TokenType.Invalid,
column = -1,
line = -1
};
void CompleteStringToken()
bool TryCompleteStringToken()
{
if (isQuoteOpen)
return;
return false;
// strings in quotes can be empty
if (!isStrInQuotes && (str.Length <= 0 || str[0] == '#'))
return;
_tokens.Add(new Token
return false;
strToken = new Token
{
type = TokenType.StringOrNumber,
column = (short)(column - str.Length),
line = line,
value = str.ToString()
});
};
str.Clear();
isStrInQuotes = false;
return true;
}
while (_saveFile.CanRead)
@ -102,8 +108,9 @@ public class Parser
switch (c)
{
case -1:
CompleteStringToken();
return;
if(TryCompleteStringToken())
yield return strToken;
yield break;
case '\"':
isQuoteOpen = !isQuoteOpen;
isStrInQuotes = true;
@ -111,36 +118,41 @@ public class Parser
case ' ':
case '\t':
case '\r':
CompleteStringToken();
if(TryCompleteStringToken())
yield return strToken;
break;
case '\n':
CompleteStringToken();
if(TryCompleteStringToken())
yield return strToken;
line++;
column = 0;
break;
case '=':
CompleteStringToken();
_tokens.Add(new Token
if(TryCompleteStringToken())
yield return strToken;
yield return new Token
{
type = TokenType.Equals,
line = line, column = (short)column
});
};
break;
case '{':
CompleteStringToken();
_tokens.Add(new Token
if(TryCompleteStringToken())
yield return strToken;
yield return new Token
{
type = TokenType.BracketOpen,
line = line, column = (short)column
});
};
break;
case '}':
CompleteStringToken();
_tokens.Add(new Token
if(TryCompleteStringToken())
yield return strToken;
yield return new Token
{
type = TokenType.BracketClose,
line = line, column = (short)column
});
};
break;
default:
// Skip control characters, which are invisible and causing frontend bugs.
@ -154,15 +166,16 @@ public class Parser
protected class UnexpectedTokenException : Exception
{
public UnexpectedTokenException(Token token, int tokenIndex) :
base($"Unexpected token at index {tokenIndex}: {token}")
public UnexpectedTokenException(Token token) :
base($"Unexpected token: {token}")
{}
}
// doesn't move next
private object? ParseValue()
{
Token tok = _tokens[_tokenIndex++];
Token tok = _tokens.Current.Value;
switch (tok.type)
{
case TokenType.StringOrNumber:
@ -180,25 +193,29 @@ public class Parser
case TokenType.BracketClose:
return null;
default:
throw new UnexpectedTokenException(tok, _tokenIndex - 1);
throw new UnexpectedTokenException(tok);
}
}
// doesn't move next
private object ParseListOrDict()
{
Token first = _tokens[_tokenIndex];
Token second = _tokens[_tokenIndex + 1];
if (first.type == TokenType.StringOrNumber && second.type == TokenType.Equals)
var first = _tokens.Current.Next;
var second = _tokens.Current.Next?.Next;
if (first?.Value.type == TokenType.StringOrNumber && second?.Value.type == TokenType.Equals)
return ParseDict();
return ParseList();
}
// moves next
private List<object> ParseList()
{
List<object> list = new();
while(true)
{
if(!_tokens.MoveNext())
throw new Exception("Unexpected end of file");
object? value = ParseValue();
if (value == null)
break;
@ -208,13 +225,14 @@ public class Parser
}
// moves next
private Dictionary<string, List<object>> ParseDict()
{
Dictionary<string, List<object>> dict = new();
// root is a dict without closing bracket, so this method must check _tokenIndex < _tokens.Count
while (_tokenIndex < _tokens.Count)
while (_tokens.MoveNext())
{
Token tok = _tokens[_tokenIndex++];
Token tok = _tokens.Current.Value;
// end of dictionary
if (tok.type == TokenType.BracketClose)
break;
@ -226,9 +244,9 @@ public class Parser
if (tok.type == TokenType.BracketOpen)
{
int bracketBalance = 1;
while (bracketBalance != 0)
while (bracketBalance != 0 && _tokens.MoveNext())
{
tok = _tokens[_tokenIndex++];
tok = _tokens.Current.Value;
if (tok.type == TokenType.BracketOpen)
bracketBalance++;
else if (tok.type == TokenType.BracketClose)
@ -239,24 +257,29 @@ public class Parser
}
if(tok.type != TokenType.StringOrNumber)
throw new UnexpectedTokenException(tok, _tokenIndex - 1);
throw new UnexpectedTokenException(tok);
string key = tok.value!;
tok = _tokens[_tokenIndex++];
if (tok.type == TokenType.BracketOpen)
// next token should be `=` or `{`
if(!_tokens.MoveNext())
throw new UnexpectedTokenException(tok);
tok = _tokens.Current.Value;
if (tok.type == TokenType.Equals)
{
// Saves may contain key-value definition without `=`.
// Example: `map_area_data{` instead of `map_area_data = {`
_tokenIndex--;
// skip `=`
if (!_tokens.MoveNext())
throw new UnexpectedTokenException(tok);
}
else if(tok.type != TokenType.Equals)
throw new UnexpectedTokenException(tok, _tokenIndex - 1);
// Saves may contain object definition without `=`.
// Example: `map_area_data {` instead of `map_area_data = {`
else if (tok.type != TokenType.BracketOpen)
throw new UnexpectedTokenException(tok);
object? value = ParseValue();
if (value == null)
throw new UnexpectedTokenException(_tokens[_tokenIndex - 1], _tokenIndex - 1);
throw new UnexpectedTokenException(_tokens.Current.Value);
if(!dict.TryGetValue(key, out List<object>? list))
{
list = new List<object>();
@ -270,12 +293,9 @@ public class Parser
public Dictionary<string, List<object>> Parse()
{
Lex();
if (_tokens.Count == 0)
throw new Exception("Save file is empty");
_tokenIndex = 0;
var root = ParseDict();
if (root.Count == 0)
throw new Exception("Save file is empty");
return root;
}
}

View File

@ -156,7 +156,6 @@ public class Program
_app.Logger.Log(LogLevel.Error, "ParseSaveEU4 Error: {errorMesage}", errorMesage);
}
GC.Collect();
await httpContext.Response.WriteAsJsonAsync(meta);
}
}

View File

@ -7,6 +7,7 @@ EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "SolutionFolder", "SolutionFolder", "{F1D312F1-0620-4E35-8D78-9A2808CDE12C}"
ProjectSection(SolutionItems) = preProject
.gitignore = .gitignore
TODO.txt = TODO.txt
EndProjectSection
EndProject
Global

10
TODO.txt Normal file
View File

@ -0,0 +1,10 @@
Main:
Move from asp.net to my own http server
Add temporary files deletion
ParseSaveHandler:
Separate status and error message from metadata
Make this method run as background task instead of POST query
Parser:
Add query support to parse only needed information