From 7c0345b1603a4a15e8ce40c14fbe55ff72623151 Mon Sep 17 00:00:00 2001 From: Timerix Date: Mon, 24 Mar 2025 02:46:03 +0500 Subject: [PATCH] implemented SearchExpression (buggy) --- .../{Parser.cs => SaveParserEU4.cs} | 92 +++++++--- ParadoxSaveParser.Lib/SearchExpression.cs | 141 +++++++++++++++ ParadoxSaveParser.WebAPI/Program.Responses.cs | 125 +++++++++++++ ParadoxSaveParser.WebAPI/Program.cs | 168 ++++++------------ 4 files changed, 388 insertions(+), 138 deletions(-) rename ParadoxSaveParser.Lib/{Parser.cs => SaveParserEU4.cs} (78%) create mode 100644 ParadoxSaveParser.Lib/SearchExpression.cs create mode 100644 ParadoxSaveParser.WebAPI/Program.Responses.cs diff --git a/ParadoxSaveParser.Lib/Parser.cs b/ParadoxSaveParser.Lib/SaveParserEU4.cs similarity index 78% rename from ParadoxSaveParser.Lib/Parser.cs rename to ParadoxSaveParser.Lib/SaveParserEU4.cs index 0d1150b..7c5c27f 100644 --- a/ParadoxSaveParser.Lib/Parser.cs +++ b/ParadoxSaveParser.Lib/SaveParserEU4.cs @@ -5,15 +5,24 @@ global using System.Text; namespace ParadoxSaveParser.Lib; -public class Parser +/// +/// Sequential parser that doesn't cache anything. +/// +public class SaveParserEU4 { protected Stream _saveFile; private BufferedEnumerator _tokens; - - public Parser(Stream savefile) + private SearchExpression _query; + private int _currentDepth; + + /// Uncompressed stream of gamestate file which can be extracted from save archive + /// Parsing whole save takes 10 seconds on mid pc and takes 1GB of RAM, + /// so you should specify what exactly you want to get from save file + public SaveParserEU4(Stream savefile, SearchExpression query) { - _tokens = new BufferedEnumerator(Lex(), 5); + _tokens = new BufferedEnumerator(LexTextSave(), 5); _saveFile = savefile; + _query = query; } protected enum TokenType : byte @@ -60,14 +69,21 @@ public class Parser } } - protected IEnumerator Lex() + protected class UnexpectedTokenException : Exception + { + public UnexpectedTokenException(Token token) : + base($"Unexpected token: {token}") + {} + } + + protected IEnumerator LexTextSave() { string expectedHeader = "EU4txt"; byte[] headBytes = new byte[expectedHeader.Length]; _saveFile.ReadExactly(headBytes); string headStr = Encoding.UTF8.GetString(headBytes); if (headStr != expectedHeader) - throw new Exception($"Invalid gamestate header: '{headStr}'"); + throw new Exception($"Invalid gamestate header. Expected '{expectedHeader}', got '{headStr}'."); StringBuilder str = new(); int line = 2; @@ -85,7 +101,8 @@ public class Parser { if (isQuoteOpen) return false; - // strings in quotes can be empty + + // strings in quotes may be empty if (!isStrInQuotes && (str.Length <= 0 || str[0] == '#')) return false; @@ -163,13 +180,6 @@ public class Parser } } } - - protected class UnexpectedTokenException : Exception - { - public UnexpectedTokenException(Token token) : - base($"Unexpected token: {token}") - {} - } // doesn't move next @@ -189,13 +199,44 @@ public class Parser return l; return tok.value; case TokenType.BracketOpen: - return ParseListOrDict(); + _currentDepth++; + var obj = ParseListOrDict(); + _currentDepth--; + return obj; case TokenType.BracketClose: return null; default: throw new UnexpectedTokenException(tok); } } + + + // skips next value + /// true if skipped value, false if current token is closing bracket + private bool SkipValue() + { + Token tok = _tokens.Current.Value; + if (tok.type == TokenType.BracketOpen) + { + SkipObject(); + return true; + } + + return tok.type != TokenType.BracketClose; + } + + // skips all tokens inside curly braces block + private void SkipObject(int bracketBalance = 1) + { + while (bracketBalance != 0 && _tokens.MoveNext()) + { + Token tok = _tokens.Current.Value; + if (tok.type == TokenType.BracketOpen) + bracketBalance++; + else if (tok.type == TokenType.BracketClose) + bracketBalance--; + } + } // doesn't move next private object ParseListOrDict() @@ -223,14 +264,14 @@ public class Parser } return list; } - // moves next private Dictionary> ParseDict() { Dictionary> dict = new(); + // root is a dict without closing bracket, so this method must check _tokenIndex < _tokens.Count - while (_tokens.MoveNext()) + for (int localIndex = 0; _tokens.MoveNext(); localIndex++) { Token tok = _tokens.Current.Value; // end of dictionary @@ -243,16 +284,7 @@ public class Parser // { } { } { }` if (tok.type == TokenType.BracketOpen) { - int bracketBalance = 1; - while (bracketBalance != 0 && _tokens.MoveNext()) - { - tok = _tokens.Current.Value; - if (tok.type == TokenType.BracketOpen) - bracketBalance++; - else if (tok.type == TokenType.BracketClose) - bracketBalance--; - } - + SkipObject(); continue; } @@ -276,6 +308,12 @@ public class Parser else if (tok.type != TokenType.BracketOpen) throw new UnexpectedTokenException(tok); + if (!_query.DoesMatch(new SearchArgs(key, _currentDepth, localIndex))) + { + SkipValue(); + continue; + } + object? value = ParseValue(); if (value is null) throw new UnexpectedTokenException(_tokens.Current.Value); diff --git a/ParadoxSaveParser.Lib/SearchExpression.cs b/ParadoxSaveParser.Lib/SearchExpression.cs new file mode 100644 index 0000000..8e59a3f --- /dev/null +++ b/ParadoxSaveParser.Lib/SearchExpression.cs @@ -0,0 +1,141 @@ +using System.Diagnostics; +using System.Linq; + +namespace ParadoxSaveParser.Lib; + +public record SearchArgs(string key, int currentDepth, int localIndex); + +public interface ISearchExpression +{ + bool DoesMatch(SearchArgs args); +} + +public class SearchExpression : ISearchExpression +{ + private List _compiledExpression; + private int _expressionDepth; + + private SearchExpression(List compiledExpression, int expressionDepth) + { + _compiledExpression = compiledExpression; + _expressionDepth = expressionDepth; + } + + + public bool DoesMatch(SearchArgs args) + { + if (args.key is "c" or "d" or "e") + Console.WriteLine("UwU"); + int index = args.currentDepth - _expressionDepth; + if (index < 0 || index >= _compiledExpression.Count) + return true; + + return _compiledExpression[index].DoesMatch(args); + } + + + private static bool CharEqualsAndNotEscaped(char c, ReadOnlySpan chars, int i) => + chars[i] == c && (i < 1 || chars[i - 1] != '\\') && (i < 2 || chars[i - 2] != '\\'); + + public static SearchExpression Parse(string query) => ParseInternal(query, 0); + + private static SearchExpression ParseInternal(ReadOnlySpan query, int expressionDepth) + { + var compiledExpression = new List(); + ISearchExpression exprPart; + int partBegin = 0; + int bracketBalance = 0; + int expressionDepthIncrement = 0; + + for (int i = 0; i < query.Length; i++) + { + if (CharEqualsAndNotEscaped('(', query, i)) + bracketBalance++; + else if (CharEqualsAndNotEscaped(')', query, i)) + bracketBalance--; + else if (bracketBalance == 0 && CharEqualsAndNotEscaped('.', query, i)) + { + var part = query.Slice(partBegin, i - partBegin); + expressionDepthIncrement++; + exprPart = ParsePart(part, query, partBegin, + expressionDepth + expressionDepthIncrement); + compiledExpression.Add(exprPart); + partBegin = i + 1; + } + } + + exprPart = ParsePart(query.Slice(partBegin), query, partBegin, + expressionDepth + expressionDepthIncrement); + compiledExpression.Add(exprPart); + + return new SearchExpression(compiledExpression, expressionDepth); + } + + private static ISearchExpression ParsePart(ReadOnlySpan part, + ReadOnlySpan query, int partBegin, int expressionDepth) + { + if (part is "*") + { + return new AnyMatchExpression(); + } + + if (CharEqualsAndNotEscaped('[', query, partBegin)) + { + part = part.Slice(1, part.Length - 2); + return new IndexMatchExpression(int.Parse(part)); + } + + if(part[0] is '(') + { + var subExprs = new List(); + ISearchExpression subExpr; + part = part.Slice(1, part.Length - 2); + int supExprBegin = 0; + for (int j = 0; j < part.Length; j++) + { + if (CharEqualsAndNotEscaped('|', part, j)) + { + subExpr = ParseInternal(part.Slice(supExprBegin, j - supExprBegin), + expressionDepth); + subExprs.Add(subExpr); + supExprBegin = j + 1; + } + } + + subExpr = ParseInternal(part.Slice(supExprBegin), expressionDepth); + subExprs.Add(subExpr); + return new MultipleMatchExpression(subExprs); + } + + return new ExactMatchExpression(part.ToString()); + } + + private record AnyMatchExpression : ISearchExpression + { + public bool DoesMatch(SearchArgs args) => true; + } + + private record MultipleMatchExpression(List subExprs) : ISearchExpression + { + public bool DoesMatch(SearchArgs args) + { + foreach (var e in subExprs) + { + if(e.DoesMatch(args)) + return true; + } + return false; + } + } + + private record IndexMatchExpression(int index) : ISearchExpression + { + public bool DoesMatch(SearchArgs args) => args.localIndex == index; + } + + private record ExactMatchExpression(string key) : ISearchExpression + { + public bool DoesMatch(SearchArgs args) => args.key == key; + } + +} diff --git a/ParadoxSaveParser.WebAPI/Program.Responses.cs b/ParadoxSaveParser.WebAPI/Program.Responses.cs new file mode 100644 index 0000000..d6c7c3d --- /dev/null +++ b/ParadoxSaveParser.WebAPI/Program.Responses.cs @@ -0,0 +1,125 @@ +using System.IO.Compression; +using System.Linq; +using System.Net; +using DTLib.Extensions; + +namespace ParadoxSaveParser.WebAPI; + +public partial class Program +{ + // ReSharper disable once NotAccessedPositionalProperty.Global + public record ErrorMessage(string errorMessage); + + private static async Task ReturnResponse(HttpListenerContext ctx, HttpStatusCode statusCode, + object response) + { + await JsonSerializer.SerializeAsync(ctx.Response.OutputStream, response, response.GetType(), + JsonSerializerOptions.Default, _mainCancel.Token); + ctx.Response.StatusCode = (int)statusCode; + return statusCode; + } + + private static async Task ReturnResponse(HttpListenerContext ctx, HttpStatusCode statusCode, + string response) + { + await ctx.Response.OutputStream.WriteAsync(response.ToBytes(), _mainCancel.Token); + ctx.Response.StatusCode = (int)statusCode; + return statusCode; + } + + private static async Task UploadSaveHandler(HttpListenerContext ctx) + { + string? contentType = ctx.Request.Headers.GetValues("Content-Type")?.FirstOrDefault(); + if (contentType != "application/octet-stream") + return await ReturnResponse(ctx, HttpStatusCode.BadRequest, + new ErrorMessage($"Invalid request Content-Type: '{contentType}'")); + + string saveId = Guid.NewGuid().ToString(); + IOPath metaFilePath = PathHelper.GetMetaFilePath(saveId); + if (File.Exists(metaFilePath)) + return await ReturnResponse(ctx, HttpStatusCode.InternalServerError, + new ErrorMessage($"Guid collision! file' {metaFilePath}' already exists.")); + + var meta = new SaveFileMetadata + { id = saveId, game = Game.EU4, status = SaveFileProcessingStatus.Initialized, }; + if (!_saveMetadataStorage.TryAdd(saveId, meta)) + return await ReturnResponse(ctx, HttpStatusCode.InternalServerError, + new ErrorMessage($"Guid collision! Can't create metadata with id {saveId}")); + + meta.status = SaveFileProcessingStatus.Uploading; + IOPath saveFilePath = PathHelper.GetSaveFilePath(meta.id); + await using var saveFile = File.OpenWrite(saveFilePath); + await using var remoteStream = ctx.Request.InputStream; + await remoteStream.CopyToAsync(saveFile, _mainCancel.Token); + meta.status = SaveFileProcessingStatus.Uploaded; + + return await ReturnResponse(ctx, HttpStatusCode.OK, saveId); + } + + private static (SaveFileMetadata? meta, ErrorMessage? errorMesage) GetMetaFromRequestId(HttpListenerContext ctx, + string requestParamName) + { + var ids = ctx.Request.QueryString.GetValues(requestParamName); + string? id = ids?.FirstOrDefault(); + if (string.IsNullOrEmpty(id)) + return (null, new ErrorMessage($"No request parameter '{requestParamName}' provided")); + + if (!_saveMetadataStorage.TryGetValue(id, out var meta)) + return (null, new ErrorMessage($"Save with {id} not found")); + + return (meta, null); + } + + private static async Task GetSaveStatusHandler(HttpListenerContext ctx) + { + var (meta, errorMessage) = GetMetaFromRequestId(ctx, "id"); + if (errorMessage is not null) + return await ReturnResponse(ctx, HttpStatusCode.InternalServerError, errorMessage); + + return await ReturnResponse(ctx, HttpStatusCode.OK, meta!); + } + + private static async Task ParseSaveEU4Handler(HttpListenerContext ctx) + { + var (meta, errorMessage) = GetMetaFromRequestId(ctx, "id"); + if (errorMessage is not null) + return await ReturnResponse(ctx, HttpStatusCode.InternalServerError, errorMessage); + + //TODO: get actual query + string searchQuery = ""; + + try + { + using var zipArchive = ZipFile.Open(PathHelper.GetSaveFilePath(meta!.id).Str, ZipArchiveMode.Read); + var zipEntry = zipArchive.Entries.FirstOrDefault(e => e.Name == "gamestate"); + if (zipEntry is null) + return await ReturnResponse(ctx, HttpStatusCode.BadRequest, + new ErrorMessage("Invalid save format: no 'gamestate' file found")); + + string extractedGamestatePath = PathHelper.GetSaveFilePath(meta.id) + ".gamestate"; + zipEntry.ExtractToFile(extractedGamestatePath, true); + var gamestateStream = File.OpenRead(extractedGamestatePath); + + meta.status = SaveFileProcessingStatus.Parsing; + var se = SearchExpression.Parse(searchQuery); + var parser = new SaveParserEU4(gamestateStream, se); + var result = parser.Parse(); + + meta.status = SaveFileProcessingStatus.SavingResults; + IOPath resultFilePath = PathHelper.GetParsedSaveFilePath(meta.id); + await using var resultFile = File.OpenWrite(resultFilePath); + await JsonSerializer.SerializeAsync(resultFile, result, _saveSerializerOptions, _mainCancel.Token); + meta.status = SaveFileProcessingStatus.Done; + meta.SaveToFile(); + } + catch (Exception ex) + { + string errorMesage = ex.ToStringDemystified(); + _loggerRoot.LogWarn(nameof(ParseSaveEU4Handler), errorMesage); + return await ReturnResponse(ctx, HttpStatusCode.BadRequest, + new ErrorMessage(errorMesage)); + } + + return await ReturnResponse(ctx, HttpStatusCode.OK, meta); + } +} \ No newline at end of file diff --git a/ParadoxSaveParser.WebAPI/Program.cs b/ParadoxSaveParser.WebAPI/Program.cs index 6b70f2f..ec25851 100644 --- a/ParadoxSaveParser.WebAPI/Program.cs +++ b/ParadoxSaveParser.WebAPI/Program.cs @@ -13,9 +13,6 @@ global using File = DTLib.Filesystem.File; global using Path = DTLib.Filesystem.Path; using System.Collections.Concurrent; using System.IO; -using System.IO.Compression; -using System.Linq; -using System.Net; using System.Text.Encodings.Web; using DTLib.Dtsod; using DTLib.Extensions; @@ -24,7 +21,7 @@ using DTLib.Web.Routes; namespace ParadoxSaveParser.WebAPI; -public class Program +public partial class Program { private static readonly IOPath _configPath = "./config.dtsod"; private static Config _config = new(); @@ -43,9 +40,64 @@ public class Program MaxDepth = 1024, }; + static void TestSearchExpression(Stream saveStream, TestCase tc) + { + saveStream.Seek(0, SeekOrigin.Begin); + var se = SearchExpression.Parse(tc.q); + var parser = new SaveParserEU4(saveStream, se); + var rootNode = parser.Parse(); + string json = JsonSerializer.Serialize(rootNode, _saveSerializerOptions); + string pdx = json.Substring(1, json.Length - 2) + .Replace(",", " ").Replace("{", "{ ").Replace("}", " }") + .Replace("\"", "").Replace("[", "").Replace("]", "").Replace(":", "="); + if(pdx == tc.a) + { + Console.WriteLine($"[OK] q:'{tc.q}' a:'{tc.a}'"); + } + else + { + Console.WriteLine($"[Error] q:'{tc.q}' a:'{tc.a}' r:'{pdx}'"); + } + } + record TestCase(string q, string a); + public static void Main(string[] args) { + + using var saveStream = new MemoryStream( + "EU4txt a={ b={ c=0 d=1 e=2 } f=3 }".ToBytes(), + false); + + TestCase[] testCases = [ + new("a", + "a={ b={ c=0 d=1 e=2 } f=3 }"), + + new("a.*", + "a={ b={ c=0 d=1 e=2 } f=3 }"), + + new("a.b", + "a={ b={ c=0 d=1 e=2 } }"), + + new("a.[0].c", + "a={ b={ c=0 } }"), + + new("a.[1]", + "a={ f=3 }"), + + new("a.b.(c|d)", + "a={ b={ c=0 d=1 } }"), + + new("a.(b.e|f)", + "a={ b={ e=2 } f=3 }"), + ]; + + foreach (var test in testCases) + { + TestSearchExpression(saveStream, test); + } + +/* Console.InputEncoding = Encoding.UTF8; Console.OutputEncoding = Encoding.UTF8; Console.CursorVisible = false; @@ -88,6 +140,7 @@ public class Program { logger.LogError(ex.ToStringDemystified()); } +*/ } public static void PrepareLocalFiles() @@ -111,111 +164,4 @@ public class Program } } - // ReSharper disable once NotAccessedPositionalProperty.Global - public record ErrorMessage(string errorMessage); - - private static async TaskReturnResponse(HttpListenerContext ctx, HttpStatusCode statusCode, object response) - { - await JsonSerializer.SerializeAsync(ctx.Response.OutputStream, response, response.GetType(), - JsonSerializerOptions.Default, _mainCancel.Token); - ctx.Response.StatusCode = (int)statusCode; - return statusCode; - } - - private static async TaskReturnResponse(HttpListenerContext ctx, HttpStatusCode statusCode, string response) - { - await ctx.Response.OutputStream.WriteAsync(response.ToBytes(), _mainCancel.Token); - ctx.Response.StatusCode = (int)statusCode; - return statusCode; - } - - private static async TaskUploadSaveHandler(HttpListenerContext ctx) - { - string? contentType = ctx.Request.Headers.GetValues("Content-Type")?.FirstOrDefault(); - if (contentType != "application/octet-stream") - return await ReturnResponse(ctx, HttpStatusCode.BadRequest, - new ErrorMessage($"Invalid request Content-Type: '{contentType}'")); - - string saveId = Guid.NewGuid().ToString(); - IOPath metaFilePath = PathHelper.GetMetaFilePath(saveId); - if (File.Exists(metaFilePath)) - return await ReturnResponse(ctx, HttpStatusCode.InternalServerError, - new ErrorMessage($"Guid collision! file' {metaFilePath}' already exists.")); - - var meta = new SaveFileMetadata { id = saveId, game = Game.EU4, status = SaveFileProcessingStatus.Initialized, }; - if (!_saveMetadataStorage.TryAdd(saveId, meta)) - return await ReturnResponse(ctx, HttpStatusCode.InternalServerError, - new ErrorMessage($"Guid collision! Can't create metadata with id {saveId}")); - - meta.status = SaveFileProcessingStatus.Uploading; - IOPath saveFilePath = PathHelper.GetSaveFilePath(meta.id); - await using var saveFile = File.OpenWrite(saveFilePath); - await using var remoteStream = ctx.Request.InputStream; - await remoteStream.CopyToAsync(saveFile, _mainCancel.Token); - meta.status = SaveFileProcessingStatus.Uploaded; - - return await ReturnResponse(ctx, HttpStatusCode.OK, saveId); - } - - private static (SaveFileMetadata? meta, ErrorMessage? errorMesage) GetMetaFromRequestId(HttpListenerContext ctx, string requestParamName) - { - var ids = ctx.Request.QueryString.GetValues(requestParamName); - string? id = ids?.FirstOrDefault(); - if (string.IsNullOrEmpty(id)) - return (null, new ErrorMessage($"No request parameter '{requestParamName}' provided")); - - if (!_saveMetadataStorage.TryGetValue(id, out var meta)) - return (null,new ErrorMessage($"Save with {id} not found")); - - return (meta, null); - } - - private static async TaskGetSaveStatusHandler(HttpListenerContext ctx) - { - var (meta, errorMessage) = GetMetaFromRequestId(ctx, "id"); - if(errorMessage is not null) - return await ReturnResponse(ctx, HttpStatusCode.InternalServerError, errorMessage); - - return await ReturnResponse(ctx, HttpStatusCode.OK, meta!); - } - - private static async TaskParseSaveEU4Handler(HttpListenerContext ctx) - { - var (meta, errorMessage) = GetMetaFromRequestId(ctx, "id"); - if(errorMessage is not null) - return await ReturnResponse(ctx, HttpStatusCode.InternalServerError, errorMessage); - - try - { - using var zipArchive = ZipFile.Open(PathHelper.GetSaveFilePath(meta!.id).Str, ZipArchiveMode.Read); - var zipEntry = zipArchive.Entries.FirstOrDefault(e => e.Name == "gamestate"); - if (zipEntry is null) - return await ReturnResponse(ctx, HttpStatusCode.BadRequest, - new ErrorMessage("Invalid save format: no 'gamestate' file found")); - - string extractedGamestatePath = PathHelper.GetSaveFilePath(meta.id) + ".gamestate"; - zipEntry.ExtractToFile(extractedGamestatePath, true); - var gamestateStream = File.OpenRead(extractedGamestatePath); - - meta.status = SaveFileProcessingStatus.Parsing; - var parser = new Parser(gamestateStream); - var result = parser.Parse(); - - meta.status = SaveFileProcessingStatus.SavingResults; - IOPath resultFilePath = PathHelper.GetParsedSaveFilePath(meta.id); - await using var resultFile = File.OpenWrite(resultFilePath); - await JsonSerializer.SerializeAsync(resultFile, result, _saveSerializerOptions, _mainCancel.Token); - meta.status = SaveFileProcessingStatus.Done; - meta.SaveToFile(); - } - catch (Exception ex) - { - string errorMesage = ex.ToStringDemystified(); - _loggerRoot.LogWarn(nameof(ParseSaveEU4Handler), errorMesage); - return await ReturnResponse(ctx, HttpStatusCode.BadRequest, - new ErrorMessage(errorMesage)); - } - - return await ReturnResponse(ctx, HttpStatusCode.OK, meta); - } } \ No newline at end of file