rewrite of SearchExpression

This commit is contained in:
Timerix 2025-04-05 03:40:03 +05:00
parent 05c6bdf008
commit 39a01dd05c
4 changed files with 129 additions and 117 deletions

View File

@ -12,17 +12,16 @@ public class SaveParserEU4
{ {
protected Stream _saveFile; protected Stream _saveFile;
private BufferedEnumerator<Token> _tokens; private BufferedEnumerator<Token> _tokens;
private SearchExpression _query; private ISearchExpression? _searchExprCurrent;
private int _currentDepth;
/// <param name="savefile">Uncompressed stream of <c>gamestate</c> file which can be extracted from save archive</param> /// <param name="savefile">Uncompressed stream of <c>gamestate</c> file which can be extracted from save archive</param>
/// <param name="query">Parsing whole save takes 10 seconds on mid pc and takes 1GB of RAM, /// <param name="query">Parsing whole save takes 10 seconds on mid pc and takes 1GB of RAM,
/// so you should specify what exactly you want to get from save file</param> /// so you should specify what exactly you want to get from save file</param>
public SaveParserEU4(Stream savefile, SearchExpression query) public SaveParserEU4(Stream savefile, ISearchExpression? query)
{ {
_tokens = new BufferedEnumerator<Token>(LexTextSave(), 5); _tokens = new BufferedEnumerator<Token>(LexTextSave(), 5);
_saveFile = savefile; _saveFile = savefile;
_query = query; _searchExprCurrent = query;
} }
protected enum TokenType : byte protected enum TokenType : byte
@ -199,9 +198,7 @@ public class SaveParserEU4
return l; return l;
return tok.value; return tok.value;
case TokenType.BracketOpen: case TokenType.BracketOpen:
_currentDepth++;
var obj = ParseListOrDict(); var obj = ParseListOrDict();
_currentDepth--;
return obj; return obj;
case TokenType.BracketClose: case TokenType.BracketClose:
return null; return null;
@ -307,16 +304,21 @@ public class SaveParserEU4
// Example: `map_area_data {` instead of `map_area_data = {` // Example: `map_area_data {` instead of `map_area_data = {`
else if (tok.type != TokenType.BracketOpen) else if (tok.type != TokenType.BracketOpen)
throw new UnexpectedTokenException(tok); throw new UnexpectedTokenException(tok);
if (!_query.DoesMatch(new SearchArgs(key, _currentDepth, localIndex))) ISearchExpression? searchExprNext = null;
if (_searchExprCurrent != null
&& !_searchExprCurrent.DoesMatch(new SearchArgs(key, localIndex), out searchExprNext))
{ {
SkipValue(); SkipValue();
continue; continue;
} }
var searExpressionPrevious = _searchExprCurrent;
_searchExprCurrent = searchExprNext;
object? value = ParseValue(); object? value = ParseValue();
if (value is null) if (value is null)
throw new UnexpectedTokenException(_tokens.Current.Value); throw new UnexpectedTokenException(_tokens.Current.Value);
_searchExprCurrent = searExpressionPrevious;
if(!dict.TryGetValue(key, out List<object>? list)) if(!dict.TryGetValue(key, out List<object>? list))
{ {

View File

@ -1,139 +1,141 @@
using System.Diagnostics; namespace ParadoxSaveParser.Lib;
using System.Linq;
namespace ParadoxSaveParser.Lib; public record SearchArgs(string key, int localIndex);
public record SearchArgs(string key, int currentDepth, int localIndex);
public interface ISearchExpression public interface ISearchExpression
{ {
bool DoesMatch(SearchArgs args); bool DoesMatch(SearchArgs args, out ISearchExpression? nextSearchExpression);
} }
public class SearchExpression : ISearchExpression public static class SearchExpressionCompiler
{ {
private List<ISearchExpression> _compiledExpression;
private int _expressionDepth;
private SearchExpression(List<ISearchExpression> compiledExpression, int expressionDepth)
{
_compiledExpression = compiledExpression;
_expressionDepth = expressionDepth;
}
public bool DoesMatch(SearchArgs args)
{
int index = args.currentDepth - _expressionDepth;
if (index < 0 || index >= _compiledExpression.Count)
return true;
return _compiledExpression[index].DoesMatch(args);
}
private static bool CharEqualsAndNotEscaped(char c, ReadOnlySpan<char> chars, int i) => private static bool CharEqualsAndNotEscaped(char c, ReadOnlySpan<char> chars, int i) =>
chars[i] == c && (i < 1 || chars[i - 1] != '\\') && (i < 2 || chars[i - 2] != '\\'); chars[i] == c && (i < 1 || chars[i - 1] != '\\') && (i < 2 || chars[i - 2] != '\\');
public static SearchExpression Parse(string query) => ParseInternal(query, 0);
private static SearchExpression ParseInternal(ReadOnlySpan<char> query, int expressionDepth) public static ISearchExpression Compile(ReadOnlySpan<char> query)
{ {
var compiledExpression = new List<ISearchExpression>(); if(query.IsEmpty)
ISearchExpression exprPart; throw new ArgumentNullException(nameof(query));
int partBegin = 0;
int bracketBalance = 0;
int expressionDepthIncrement = 0;
for (int i = 0; i < query.Length; i++) if(query[0] is '(')
{
if (CharEqualsAndNotEscaped('(', query, i))
bracketBalance++;
else if (CharEqualsAndNotEscaped(')', query, i))
bracketBalance--;
else if (bracketBalance == 0 && CharEqualsAndNotEscaped('.', query, i))
{
var part = query.Slice(partBegin, i - partBegin);
expressionDepthIncrement++;
exprPart = ParsePart(part, query, partBegin,
expressionDepth + expressionDepthIncrement);
compiledExpression.Add(exprPart);
partBegin = i + 1;
}
}
exprPart = ParsePart(query.Slice(partBegin), query, partBegin,
expressionDepth + expressionDepthIncrement);
compiledExpression.Add(exprPart);
return new SearchExpression(compiledExpression, expressionDepth);
}
private static ISearchExpression ParsePart(ReadOnlySpan<char> part,
ReadOnlySpan<char> query, int partBegin, int expressionDepth)
{
if (part is "*")
{
return new AnyMatchExpression();
}
if (CharEqualsAndNotEscaped('[', query, partBegin))
{
part = part.Slice(1, part.Length - 2);
return new IndexMatchExpression(int.Parse(part));
}
if(part[0] is '(')
{ {
var subExprs = new List<ISearchExpression>(); var subExprs = new List<ISearchExpression>();
ISearchExpression subExpr; int supExprBegin = 1;
part = part.Slice(1, part.Length - 2); int bracketBalance = 1;
int supExprBegin = 0; for (int i = supExprBegin; i < query.Length && bracketBalance != 0; i++)
for (int j = 0; j < part.Length; j++)
{ {
if (CharEqualsAndNotEscaped('|', part, j)) if (CharEqualsAndNotEscaped('(', query, i))
bracketBalance++;
else if (CharEqualsAndNotEscaped(')', query, i))
bracketBalance--;
else if (bracketBalance == 1 && CharEqualsAndNotEscaped('|', query, i))
{ {
subExpr = ParseInternal(part.Slice(supExprBegin, j - supExprBegin), var subPart = query.Slice(supExprBegin, i - supExprBegin);
expressionDepth); var subExpr = Compile(subPart);
subExprs.Add(subExpr); subExprs.Add(subExpr);
supExprBegin = j + 1; supExprBegin = i + 1;
} }
} }
subExpr = ParseInternal(part.Slice(supExprBegin), expressionDepth); if(query[^1] != ')')
subExprs.Add(subExpr); throw new NotImplementedException("Expressions after ')' are not supported");
if (bracketBalance > 0)
throw new Exception("Too many opening brackets");
if (bracketBalance < 0)
throw new Exception("Too many closing brackets");
var subPartLast = query.Slice(supExprBegin, query.Length - supExprBegin - 1);
var subExprLast = Compile(subPartLast);
subExprs.Add(subExprLast);
return new MultipleMatchExpression(subExprs); return new MultipleMatchExpression(subExprs);
} }
int partBeforePointLength = 0;
while (partBeforePointLength < query.Length)
{
if(CharEqualsAndNotEscaped('.', query, partBeforePointLength))
break;
partBeforePointLength++;
}
var part = query.Slice(0, partBeforePointLength);
ReadOnlySpan<char> remaining = default;
if (partBeforePointLength < query.Length)
remaining = query.Slice(partBeforePointLength + 1);
if (part is "*")
{
return new AnyMatchExpression(remaining.IsEmpty ? null : Compile(remaining));
}
for (int j = 0; j < part.Length; j++)
{
if(CharEqualsAndNotEscaped('*', part, j))
throw new NotImplementedException("pattern matching other than '*' is not implemented yet");
}
return new ExactMatchExpression(part.ToString()); if (part[0] is '[')
{
part = part.Slice(1, part.Length - 2);
return new IndexMatchExpression(int.Parse(part), remaining.IsEmpty ? null : Compile(remaining));
}
return new ExactMatchExpression(part.ToString(), remaining.IsEmpty ? null : Compile(remaining));
} }
private record AnyMatchExpression : ISearchExpression private record AnyMatchExpression(ISearchExpression? next) : ISearchExpression
{ {
public bool DoesMatch(SearchArgs args) => true; public bool DoesMatch(SearchArgs args, out ISearchExpression? nextSearchExpression)
{
nextSearchExpression = next;
return true;
}
} }
private record MultipleMatchExpression(List<ISearchExpression> subExprs) : ISearchExpression private record MultipleMatchExpression(List<ISearchExpression> subExprs) : ISearchExpression
{ {
public bool DoesMatch(SearchArgs args) public bool DoesMatch(SearchArgs args, out ISearchExpression? nextSearchExpression)
{ {
foreach (var e in subExprs) foreach (var e in subExprs)
{ {
if(e.DoesMatch(args)) if(e.DoesMatch(args, out nextSearchExpression))
{
return true; return true;
}
} }
nextSearchExpression = null;
return false; return false;
} }
} }
private record IndexMatchExpression(int index) : ISearchExpression private record IndexMatchExpression(int index, ISearchExpression? next) : ISearchExpression
{ {
public bool DoesMatch(SearchArgs args) => args.localIndex == index; public bool DoesMatch(SearchArgs args, out ISearchExpression? nextSearchExpression)
{
if (args.localIndex == index)
{
nextSearchExpression = next;
return true;
}
nextSearchExpression = null;
return false;
}
} }
private record ExactMatchExpression(string key) : ISearchExpression private record ExactMatchExpression(string key, ISearchExpression? next) : ISearchExpression
{ {
public bool DoesMatch(SearchArgs args) => args.key == key; public bool DoesMatch(SearchArgs args, out ISearchExpression? nextSearchExpression)
{
if (args.key == key)
{
nextSearchExpression = next;
return true;
}
nextSearchExpression = null;
return false;
}
} }
} }

View File

@ -101,7 +101,7 @@ public partial class Program
var gamestateStream = File.OpenRead(extractedGamestatePath); var gamestateStream = File.OpenRead(extractedGamestatePath);
meta.status = SaveFileProcessingStatus.Parsing; meta.status = SaveFileProcessingStatus.Parsing;
var se = SearchExpression.Parse(searchQuery); var se = SearchExpressionCompiler.Compile(searchQuery);
var parser = new SaveParserEU4(gamestateStream, se); var parser = new SaveParserEU4(gamestateStream, se);
var result = parser.Parse(); var result = parser.Parse();

View File

@ -42,21 +42,29 @@ public partial class Program
static void TestSearchExpression(Stream saveStream, TestCase tc) static void TestSearchExpression(Stream saveStream, TestCase tc)
{ {
saveStream.Seek(0, SeekOrigin.Begin); try
var se = SearchExpression.Parse(tc.q);
var parser = new SaveParserEU4(saveStream, se);
var rootNode = parser.Parse();
string json = JsonSerializer.Serialize(rootNode, _saveSerializerOptions);
string pdx = json.Substring(1, json.Length - 2)
.Replace(",", " ").Replace("{", "{ ").Replace("}", " }")
.Replace("\"", "").Replace("[", "").Replace("]", "").Replace(":", "=");
if(pdx == tc.a)
{ {
Console.WriteLine($"[OK] q:'{tc.q}' a:'{tc.a}'");
saveStream.Seek(0, SeekOrigin.Begin);
var se = SearchExpressionCompiler.Compile(tc.q);
var parser = new SaveParserEU4(saveStream, se);
var rootNode = parser.Parse();
string json = JsonSerializer.Serialize(rootNode, _saveSerializerOptions);
string pdx = json.Substring(1, json.Length - 2)
.Replace(",", " ").Replace("{", "{ ").Replace("}", " }")
.Replace("\"", "").Replace("[", "").Replace("]", "").Replace(":", "=");
if (pdx == tc.a)
{
Console.WriteLine($"[OK] q:'{tc.q}' a:'{tc.a}'");
}
else
{
Console.WriteLine($"[Invalid] q:'{tc.q}' a:'{tc.a}' r:'{pdx}'");
}
} }
else catch (Exception ex)
{ {
Console.WriteLine($"[Error] q:'{tc.q}' a:'{tc.a}' r:'{pdx}'"); Console.WriteLine($"[Error] q:'{tc.q}' a:'{tc.a}' e:\n" + ex.ToStringDemystified());
} }
} }