From f69b498cafb8d8a5c5f5eed33386d3102ef17544 Mon Sep 17 00:00:00 2001 From: Timerix Date: Sat, 22 Mar 2025 18:14:32 +0500 Subject: [PATCH] fixed many bugs in parser --- .../ParadoxSaveParser.Lib.csproj | 3 +- ParadoxSaveParser.Lib/Parser.cs | 118 +++++++++++++++--- .../ParadoxSaveParser.WebAPI.csproj | 16 ++- ParadoxSaveParser.WebAPI/Program.cs | 34 ++--- 4 files changed, 134 insertions(+), 37 deletions(-) diff --git a/ParadoxSaveParser.Lib/ParadoxSaveParser.Lib.csproj b/ParadoxSaveParser.Lib/ParadoxSaveParser.Lib.csproj index 595335a..f063c0b 100644 --- a/ParadoxSaveParser.Lib/ParadoxSaveParser.Lib.csproj +++ b/ParadoxSaveParser.Lib/ParadoxSaveParser.Lib.csproj @@ -1,8 +1,7 @@  - net8.0 - enable + disable enable diff --git a/ParadoxSaveParser.Lib/Parser.cs b/ParadoxSaveParser.Lib/Parser.cs index ae85609..61f3c3a 100644 --- a/ParadoxSaveParser.Lib/Parser.cs +++ b/ParadoxSaveParser.Lib/Parser.cs @@ -1,11 +1,14 @@ -using System.Text; +global using System; +global using System.IO; +global using System.Text; +global using System.Collections.Generic; namespace ParadoxSaveParser.Lib; public class Parser { protected Stream _saveFile; - private List _tokens = new(); + private List _tokens = new(4_194_304); private int _tokenIndex; public Parser(Stream savefile) @@ -13,7 +16,7 @@ public class Parser _saveFile = savefile; } - protected enum TokenType + protected enum TokenType : byte { Invalid, String, @@ -25,67 +28,119 @@ public class Parser protected struct Token { public TokenType type; + public short column; + public int line; public string? value; public override string ToString() { + string s; switch (type) { case TokenType.Invalid: - return "INVALID_TOKEN"; + s = "INVALID_TOKEN"; + break; case TokenType.String: - return value ?? "NULL"; + s = value ?? "NULL"; + break; case TokenType.Equals: - return "="; + s = "="; + break; case TokenType.BracketOpen: - return "{"; + s = "{"; + break; case TokenType.BracketClose: - return "}"; + s = "}"; + break; default: throw new ArgumentOutOfRangeException(type.ToString()); } + + return $"{line}:{column} '{s}'"; } } protected void Lex() { _tokens.Clear(); + + string expectedHeader = "EU4txt"; + byte[] headBytes = new byte[expectedHeader.Length]; + _saveFile.ReadExactly(headBytes); + string headStr = Encoding.UTF8.GetString(headBytes); + if (headStr != expectedHeader) + throw new Exception($"Invalid gamestate header: '{headStr}'"); + StringBuilder str = new(); + int line = 2; + int column = 0; + bool isQuoteOpen = false; + bool isStrInQuotes = false; void CompleteStringToken() { - if (str.Length > 0 && str[0] != '#') + if (isQuoteOpen) + return; + // strings in quotes can be empty + if (!isStrInQuotes && (str.Length <= 0 || str[0] == '#')) + return; + _tokens.Add(new Token { - _tokens.Add(new Token { type = TokenType.String, value = str.ToString() }); - str.Clear(); - } + type = TokenType.String, + column = (short)(column - str.Length), + line = line, + value = str.ToString() + }); + str.Clear(); + isStrInQuotes = false; } while (_saveFile.CanRead) { int c = _saveFile.ReadByte(); + column++; switch (c) { case -1: CompleteStringToken(); return; + case '\"': + isQuoteOpen = !isQuoteOpen; + isStrInQuotes = true; + break; case ' ': case '\t': - case '\n': case '\r': CompleteStringToken(); break; + case '\n': + CompleteStringToken(); + line++; + column = 0; + break; case '=': CompleteStringToken(); - _tokens.Add(new Token { type = TokenType.Equals }); + _tokens.Add(new Token + { + type = TokenType.Equals, + line = line, column = (short)column + }); break; case '{': CompleteStringToken(); - _tokens.Add(new Token { type = TokenType.BracketOpen }); + _tokens.Add(new Token + { + type = TokenType.BracketOpen, + line = line, column = (short)column + }); break; case '}': CompleteStringToken(); - _tokens.Add(new Token { type = TokenType.BracketClose }); + _tokens.Add(new Token + { + type = TokenType.BracketClose, + line = line, column = (short)column + }); break; default: str.Append((char)c); @@ -131,8 +186,7 @@ public class Parser private List ParseList() { List list = new(); - Token tok = _tokens[_tokenIndex]; - while (tok.type != TokenType.BracketClose) + while(true) { object? value = ParseValue(); if (value == null) @@ -150,16 +204,42 @@ public class Parser while (_tokenIndex < _tokens.Count) { Token tok = _tokens[_tokenIndex++]; + // end of dictionary if (tok.type == TokenType.BracketClose) break; + // Saves may contain some blocks without key. + // Such blocks are skipped because idk where to put them. + // Example: `technology_group=tech_cannorian{ } + // { } { } { }` + if (tok.type == TokenType.BracketOpen) + { + int bracketBalance = 1; + while (bracketBalance != 0) + { + tok = _tokens[_tokenIndex++]; + if (tok.type == TokenType.BracketOpen) + bracketBalance++; + else if (tok.type == TokenType.BracketClose) + bracketBalance--; + } + + continue; + } + if(tok.type != TokenType.String) throw new UnexpectedTokenException(tok, _tokenIndex - 1); string key = tok.value!; tok = _tokens[_tokenIndex++]; - if(tok.type != TokenType.Equals) + if (tok.type == TokenType.BracketOpen) + { + // Saves may contain key-value definition without `=`. + // Example: `map_area_data{` instead of `map_area_data = {` + _tokenIndex--; + } + else if(tok.type != TokenType.Equals) throw new UnexpectedTokenException(tok, _tokenIndex - 1); diff --git a/ParadoxSaveParser.WebAPI/ParadoxSaveParser.WebAPI.csproj b/ParadoxSaveParser.WebAPI/ParadoxSaveParser.WebAPI.csproj index 0199c4b..60e5306 100644 --- a/ParadoxSaveParser.WebAPI/ParadoxSaveParser.WebAPI.csproj +++ b/ParadoxSaveParser.WebAPI/ParadoxSaveParser.WebAPI.csproj @@ -1,5 +1,4 @@ - net8.0 enable @@ -15,4 +14,19 @@ + + + + + + + + + + + + + + + diff --git a/ParadoxSaveParser.WebAPI/Program.cs b/ParadoxSaveParser.WebAPI/Program.cs index dbf734f..42b3669 100644 --- a/ParadoxSaveParser.WebAPI/Program.cs +++ b/ParadoxSaveParser.WebAPI/Program.cs @@ -1,5 +1,7 @@ global using System; global using System.IO; +global using System.Collections.Generic; +global using System.Text; global using System.Text.Json; global using System.Threading.Tasks; global using DTLib.Demystifier; @@ -7,7 +9,7 @@ global using ParadoxSaveParser.Lib; using System.Collections.Concurrent; using System.IO.Compression; using System.Linq; -using System.Text; +using System.Text.Encodings.Web; using Microsoft.AspNetCore.Builder; using Microsoft.AspNetCore.Http; using Microsoft.Extensions.Logging; @@ -18,7 +20,14 @@ public class Program { private static ConcurrentDictionary _saveMetadataStorage = new(); private static WebApplication _app = null!; - + + private static JsonSerializerOptions _saveSerializerOptions = new() + { + WriteIndented = false, + Encoder = JavaScriptEncoder.UnsafeRelaxedJsonEscaping, + MaxDepth = 1024, + }; + public static void Main(string[] args) { var builder = WebApplication.CreateBuilder(args); @@ -116,31 +125,25 @@ public class Program try { - if(meta.status != SaveFileProcessingStatus.Uploaded) + if (meta.status != SaveFileProcessingStatus.Uploaded) throw new Exception($"Invalid save processing status: {meta.status}"); - + using var zipArchive = ZipFile.Open(PathHelper.GetSaveFilePath(meta.id), ZipArchiveMode.Read); var zipEntry = zipArchive.Entries.FirstOrDefault(e => e.Name == "gamestate"); - if(zipEntry is null) + if (zipEntry is null) throw new Exception("Invalid save format: no gamestate file found"); string extractedGamestatePath = PathHelper.GetSaveFilePath(meta.id) + ".gamestate"; zipEntry.ExtractToFile(extractedGamestatePath); var gamestateStream = File.Open(extractedGamestatePath, FileMode.Open, FileAccess.Read); - + meta.status = SaveFileProcessingStatus.Parsing; - string expectedHeader = "EU4txt"; - byte[] headBytes = new byte[expectedHeader.Length]; - gamestateStream.ReadExactly(headBytes); - string headStr = Encoding.UTF8.GetString(headBytes); - if(headStr != expectedHeader) - throw new Exception($"Invalid gamestate header: '{headStr}'"); var parser = new Parser(gamestateStream); var result = parser.Parse(); - + meta.status = SaveFileProcessingStatus.SavingResults; string resultFilePath = PathHelper.GetParsedSaveFilePath(meta.id); await using var resultFile = File.Open(resultFilePath, FileMode.CreateNew, FileAccess.Write); - await JsonSerializer.SerializeAsync(resultFile, result); + await JsonSerializer.SerializeAsync(resultFile, result, _saveSerializerOptions); meta.status = SaveFileProcessingStatus.Done; meta.SaveToFile(); } @@ -152,7 +155,8 @@ public class Program httpContext.Response.StatusCode = StatusCodes.Status500InternalServerError; _app.Logger.Log(LogLevel.Error, "ParseSaveEU4 Error: {errorMesage}", errorMesage); } - + + GC.Collect(); await httpContext.Response.WriteAsJsonAsync(meta); } } \ No newline at end of file