rewrite of SearchExpression

This commit is contained in:
Timerix 2025-04-05 03:40:03 +05:00
parent 05c6bdf008
commit 39a01dd05c
4 changed files with 129 additions and 117 deletions

View File

@ -12,17 +12,16 @@ public class SaveParserEU4
{
protected Stream _saveFile;
private BufferedEnumerator<Token> _tokens;
private SearchExpression _query;
private int _currentDepth;
private ISearchExpression? _searchExprCurrent;
/// <param name="savefile">Uncompressed stream of <c>gamestate</c> file which can be extracted from save archive</param>
/// <param name="query">Parsing whole save takes 10 seconds on mid pc and takes 1GB of RAM,
/// so you should specify what exactly you want to get from save file</param>
public SaveParserEU4(Stream savefile, SearchExpression query)
public SaveParserEU4(Stream savefile, ISearchExpression? query)
{
_tokens = new BufferedEnumerator<Token>(LexTextSave(), 5);
_saveFile = savefile;
_query = query;
_searchExprCurrent = query;
}
protected enum TokenType : byte
@ -199,9 +198,7 @@ public class SaveParserEU4
return l;
return tok.value;
case TokenType.BracketOpen:
_currentDepth++;
var obj = ParseListOrDict();
_currentDepth--;
return obj;
case TokenType.BracketClose:
return null;
@ -307,16 +304,21 @@ public class SaveParserEU4
// Example: `map_area_data {` instead of `map_area_data = {`
else if (tok.type != TokenType.BracketOpen)
throw new UnexpectedTokenException(tok);
if (!_query.DoesMatch(new SearchArgs(key, _currentDepth, localIndex)))
ISearchExpression? searchExprNext = null;
if (_searchExprCurrent != null
&& !_searchExprCurrent.DoesMatch(new SearchArgs(key, localIndex), out searchExprNext))
{
SkipValue();
continue;
}
var searExpressionPrevious = _searchExprCurrent;
_searchExprCurrent = searchExprNext;
object? value = ParseValue();
if (value is null)
throw new UnexpectedTokenException(_tokens.Current.Value);
_searchExprCurrent = searExpressionPrevious;
if(!dict.TryGetValue(key, out List<object>? list))
{

View File

@ -1,139 +1,141 @@
using System.Diagnostics;
using System.Linq;
namespace ParadoxSaveParser.Lib;
namespace ParadoxSaveParser.Lib;
public record SearchArgs(string key, int currentDepth, int localIndex);
public record SearchArgs(string key, int localIndex);
public interface ISearchExpression
{
bool DoesMatch(SearchArgs args);
bool DoesMatch(SearchArgs args, out ISearchExpression? nextSearchExpression);
}
public class SearchExpression : ISearchExpression
public static class SearchExpressionCompiler
{
private List<ISearchExpression> _compiledExpression;
private int _expressionDepth;
private SearchExpression(List<ISearchExpression> compiledExpression, int expressionDepth)
{
_compiledExpression = compiledExpression;
_expressionDepth = expressionDepth;
}
public bool DoesMatch(SearchArgs args)
{
int index = args.currentDepth - _expressionDepth;
if (index < 0 || index >= _compiledExpression.Count)
return true;
return _compiledExpression[index].DoesMatch(args);
}
private static bool CharEqualsAndNotEscaped(char c, ReadOnlySpan<char> chars, int i) =>
chars[i] == c && (i < 1 || chars[i - 1] != '\\') && (i < 2 || chars[i - 2] != '\\');
public static SearchExpression Parse(string query) => ParseInternal(query, 0);
private static SearchExpression ParseInternal(ReadOnlySpan<char> query, int expressionDepth)
public static ISearchExpression Compile(ReadOnlySpan<char> query)
{
var compiledExpression = new List<ISearchExpression>();
ISearchExpression exprPart;
int partBegin = 0;
int bracketBalance = 0;
int expressionDepthIncrement = 0;
if(query.IsEmpty)
throw new ArgumentNullException(nameof(query));
for (int i = 0; i < query.Length; i++)
{
if (CharEqualsAndNotEscaped('(', query, i))
bracketBalance++;
else if (CharEqualsAndNotEscaped(')', query, i))
bracketBalance--;
else if (bracketBalance == 0 && CharEqualsAndNotEscaped('.', query, i))
{
var part = query.Slice(partBegin, i - partBegin);
expressionDepthIncrement++;
exprPart = ParsePart(part, query, partBegin,
expressionDepth + expressionDepthIncrement);
compiledExpression.Add(exprPart);
partBegin = i + 1;
}
}
exprPart = ParsePart(query.Slice(partBegin), query, partBegin,
expressionDepth + expressionDepthIncrement);
compiledExpression.Add(exprPart);
return new SearchExpression(compiledExpression, expressionDepth);
}
private static ISearchExpression ParsePart(ReadOnlySpan<char> part,
ReadOnlySpan<char> query, int partBegin, int expressionDepth)
{
if (part is "*")
{
return new AnyMatchExpression();
}
if (CharEqualsAndNotEscaped('[', query, partBegin))
{
part = part.Slice(1, part.Length - 2);
return new IndexMatchExpression(int.Parse(part));
}
if(part[0] is '(')
if(query[0] is '(')
{
var subExprs = new List<ISearchExpression>();
ISearchExpression subExpr;
part = part.Slice(1, part.Length - 2);
int supExprBegin = 0;
for (int j = 0; j < part.Length; j++)
int supExprBegin = 1;
int bracketBalance = 1;
for (int i = supExprBegin; i < query.Length && bracketBalance != 0; i++)
{
if (CharEqualsAndNotEscaped('|', part, j))
if (CharEqualsAndNotEscaped('(', query, i))
bracketBalance++;
else if (CharEqualsAndNotEscaped(')', query, i))
bracketBalance--;
else if (bracketBalance == 1 && CharEqualsAndNotEscaped('|', query, i))
{
subExpr = ParseInternal(part.Slice(supExprBegin, j - supExprBegin),
expressionDepth);
var subPart = query.Slice(supExprBegin, i - supExprBegin);
var subExpr = Compile(subPart);
subExprs.Add(subExpr);
supExprBegin = j + 1;
supExprBegin = i + 1;
}
}
subExpr = ParseInternal(part.Slice(supExprBegin), expressionDepth);
subExprs.Add(subExpr);
if(query[^1] != ')')
throw new NotImplementedException("Expressions after ')' are not supported");
if (bracketBalance > 0)
throw new Exception("Too many opening brackets");
if (bracketBalance < 0)
throw new Exception("Too many closing brackets");
var subPartLast = query.Slice(supExprBegin, query.Length - supExprBegin - 1);
var subExprLast = Compile(subPartLast);
subExprs.Add(subExprLast);
return new MultipleMatchExpression(subExprs);
}
int partBeforePointLength = 0;
while (partBeforePointLength < query.Length)
{
if(CharEqualsAndNotEscaped('.', query, partBeforePointLength))
break;
partBeforePointLength++;
}
var part = query.Slice(0, partBeforePointLength);
ReadOnlySpan<char> remaining = default;
if (partBeforePointLength < query.Length)
remaining = query.Slice(partBeforePointLength + 1);
if (part is "*")
{
return new AnyMatchExpression(remaining.IsEmpty ? null : Compile(remaining));
}
for (int j = 0; j < part.Length; j++)
{
if(CharEqualsAndNotEscaped('*', part, j))
throw new NotImplementedException("pattern matching other than '*' is not implemented yet");
}
return new ExactMatchExpression(part.ToString());
if (part[0] is '[')
{
part = part.Slice(1, part.Length - 2);
return new IndexMatchExpression(int.Parse(part), remaining.IsEmpty ? null : Compile(remaining));
}
return new ExactMatchExpression(part.ToString(), remaining.IsEmpty ? null : Compile(remaining));
}
private record AnyMatchExpression : ISearchExpression
private record AnyMatchExpression(ISearchExpression? next) : ISearchExpression
{
public bool DoesMatch(SearchArgs args) => true;
public bool DoesMatch(SearchArgs args, out ISearchExpression? nextSearchExpression)
{
nextSearchExpression = next;
return true;
}
}
private record MultipleMatchExpression(List<ISearchExpression> subExprs) : ISearchExpression
{
public bool DoesMatch(SearchArgs args)
public bool DoesMatch(SearchArgs args, out ISearchExpression? nextSearchExpression)
{
foreach (var e in subExprs)
{
if(e.DoesMatch(args))
if(e.DoesMatch(args, out nextSearchExpression))
{
return true;
}
}
nextSearchExpression = null;
return false;
}
}
private record IndexMatchExpression(int index) : ISearchExpression
private record IndexMatchExpression(int index, ISearchExpression? next) : ISearchExpression
{
public bool DoesMatch(SearchArgs args) => args.localIndex == index;
public bool DoesMatch(SearchArgs args, out ISearchExpression? nextSearchExpression)
{
if (args.localIndex == index)
{
nextSearchExpression = next;
return true;
}
nextSearchExpression = null;
return false;
}
}
private record ExactMatchExpression(string key) : ISearchExpression
private record ExactMatchExpression(string key, ISearchExpression? next) : ISearchExpression
{
public bool DoesMatch(SearchArgs args) => args.key == key;
public bool DoesMatch(SearchArgs args, out ISearchExpression? nextSearchExpression)
{
if (args.key == key)
{
nextSearchExpression = next;
return true;
}
nextSearchExpression = null;
return false;
}
}
}

View File

@ -101,7 +101,7 @@ public partial class Program
var gamestateStream = File.OpenRead(extractedGamestatePath);
meta.status = SaveFileProcessingStatus.Parsing;
var se = SearchExpression.Parse(searchQuery);
var se = SearchExpressionCompiler.Compile(searchQuery);
var parser = new SaveParserEU4(gamestateStream, se);
var result = parser.Parse();

View File

@ -42,21 +42,29 @@ public partial class Program
static void TestSearchExpression(Stream saveStream, TestCase tc)
{
saveStream.Seek(0, SeekOrigin.Begin);
var se = SearchExpression.Parse(tc.q);
var parser = new SaveParserEU4(saveStream, se);
var rootNode = parser.Parse();
string json = JsonSerializer.Serialize(rootNode, _saveSerializerOptions);
string pdx = json.Substring(1, json.Length - 2)
.Replace(",", " ").Replace("{", "{ ").Replace("}", " }")
.Replace("\"", "").Replace("[", "").Replace("]", "").Replace(":", "=");
if(pdx == tc.a)
try
{
Console.WriteLine($"[OK] q:'{tc.q}' a:'{tc.a}'");
saveStream.Seek(0, SeekOrigin.Begin);
var se = SearchExpressionCompiler.Compile(tc.q);
var parser = new SaveParserEU4(saveStream, se);
var rootNode = parser.Parse();
string json = JsonSerializer.Serialize(rootNode, _saveSerializerOptions);
string pdx = json.Substring(1, json.Length - 2)
.Replace(",", " ").Replace("{", "{ ").Replace("}", " }")
.Replace("\"", "").Replace("[", "").Replace("]", "").Replace(":", "=");
if (pdx == tc.a)
{
Console.WriteLine($"[OK] q:'{tc.q}' a:'{tc.a}'");
}
else
{
Console.WriteLine($"[Invalid] q:'{tc.q}' a:'{tc.a}' r:'{pdx}'");
}
}
else
catch (Exception ex)
{
Console.WriteLine($"[Error] q:'{tc.q}' a:'{tc.a}' r:'{pdx}'");
Console.WriteLine($"[Error] q:'{tc.q}' a:'{tc.a}' e:\n" + ex.ToStringDemystified());
}
}