From af344e2ea10cccf72f87ab832150f3203ee3d99f Mon Sep 17 00:00:00 2001 From: rampaa Date: Fri, 15 Dec 2023 20:28:12 +0300 Subject: [PATCH] Minor --- JL.Core/Dicts/EPWING/EpwingUtils.cs | 42 +++---- JL.Core/Dicts/EPWING/IEpwingRecord.cs | 2 +- .../Dicts/EPWING/Nazeka/EpwingNazekaLoader.cs | 25 ++--- .../Dicts/EPWING/Nazeka/EpwingNazekaRecord.cs | 2 +- .../EPWING/Yomichan/EpwingYomichanLoader.cs | 69 +++++++++--- .../EPWING/Yomichan/EpwingYomichanRecord.cs | 105 +++++------------- 6 files changed, 117 insertions(+), 128 deletions(-) diff --git a/JL.Core/Dicts/EPWING/EpwingUtils.cs b/JL.Core/Dicts/EPWING/EpwingUtils.cs index f54b0ff1..6df1ceb1 100644 --- a/JL.Core/Dicts/EPWING/EpwingUtils.cs +++ b/JL.Core/Dicts/EPWING/EpwingUtils.cs @@ -25,9 +25,9 @@ internal static class EpwingUtils '→', '━' }; - public static bool IsValidEpwingResultForDictType(IEpwingRecord epwingRecord, Dict dict) + public static bool IsValidEpwingResultForDictType(string primarySpelling, string? reading, string[] definitions, Dict dict) { - foreach (char c in epwingRecord.PrimarySpelling) + foreach (char c in primarySpelling) { if (s_invalidCharacters.Contains(c) || char.IsWhiteSpace(c)) { @@ -40,13 +40,13 @@ public static bool IsValidEpwingResultForDictType(IEpwingRecord epwingRecord, Di case DictType.Kenkyuusha: if ((dict.Options?.Examples?.Value ?? ExamplesOptionValue.None) is ExamplesOptionValue.None) { - if (epwingRecord.Definitions.Length > 2) + if (definitions.Length > 2) { - for (int i = 2; i < epwingRecord.Definitions.Length; i++) + for (int i = 2; i < definitions.Length; i++) { - if (!char.IsDigit(epwingRecord.Definitions[i][0])) + if (!char.IsDigit(definitions[i][0])) { - epwingRecord.Definitions = epwingRecord.Definitions.RemoveAt(i); + definitions = definitions.RemoveAt(i); --i; } } @@ -54,13 +54,13 @@ public static bool IsValidEpwingResultForDictType(IEpwingRecord epwingRecord, Di } else if (dict.Options is { Examples.Value: ExamplesOptionValue.One }) { - if (epwingRecord.Definitions.Length > 2) + if (definitions.Length > 2) { bool isMainExample = true; - for (int i = 2; i < epwingRecord.Definitions.Length; i++) + for (int i = 2; i < definitions.Length; i++) { - if (char.IsDigit(epwingRecord.Definitions[i][0])) + if (char.IsDigit(definitions[i][0])) { isMainExample = true; } @@ -69,7 +69,7 @@ public static bool IsValidEpwingResultForDictType(IEpwingRecord epwingRecord, Di { if (!isMainExample) { - epwingRecord.Definitions = epwingRecord.Definitions.RemoveAt(i); + definitions = definitions.RemoveAt(i); --i; } @@ -79,13 +79,13 @@ public static bool IsValidEpwingResultForDictType(IEpwingRecord epwingRecord, Di } } - epwingRecord.Definitions = epwingRecord.Definitions.Select(static def => def.Replace("┏", "", StringComparison.Ordinal)).ToArray(); + definitions = definitions.Select(static def => def.Replace("┏", "", StringComparison.Ordinal)).ToArray(); break; case DictType.Daijisen: // Kanji definitions - if (epwingRecord.Definitions.Any(static def => def.Contains("[音]", StringComparison.Ordinal))) + if (definitions.Any(static def => def.Contains("[音]", StringComparison.Ordinal))) { return false; } @@ -93,30 +93,30 @@ public static bool IsValidEpwingResultForDictType(IEpwingRecord epwingRecord, Di break; } - return FilterDuplicateEntries(epwingRecord, dict); + return FilterDuplicateEntries(primarySpelling, reading, definitions, dict); } - private static bool FilterDuplicateEntries(IEpwingRecord epwingRecord, Dict dict) + private static bool FilterDuplicateEntries(string primarySpelling, string? reading, string[] definitions, Dict dict) { if (dict.Contents.TryGetValue( - JapaneseUtils.KatakanaToHiragana(epwingRecord.PrimarySpelling), + JapaneseUtils.KatakanaToHiragana(primarySpelling), out IList? previousResults)) { for (int i = 0; i < previousResults.Count; i++) { IEpwingRecord previousResult = (IEpwingRecord)previousResults[i]; - if (previousResult.Definitions.SequenceEqual(epwingRecord.Definitions)) + if (previousResult.Definitions.SequenceEqual(definitions)) { // If an entry has reading info while others don't, keep the one with the reading info. if (string.IsNullOrEmpty(previousResult.Reading) && - !string.IsNullOrEmpty(epwingRecord.Reading)) + !string.IsNullOrEmpty(reading)) { previousResults.RemoveAt(i); break; } - if (epwingRecord.Reading == previousResult.Reading) + if (reading == previousResult.Reading) { return false; } @@ -124,15 +124,15 @@ private static bool FilterDuplicateEntries(IEpwingRecord epwingRecord, Dict dict } } - else if (epwingRecord.Reading is not null && dict.Contents.TryGetValue( - JapaneseUtils.KatakanaToHiragana(epwingRecord.Reading), + else if (reading is not null && dict.Contents.TryGetValue( + JapaneseUtils.KatakanaToHiragana(reading), out previousResults)) { for (int i = 0; i < previousResults.Count; i++) { IEpwingRecord previousResult = (IEpwingRecord)previousResults[i]; - if (previousResult.Definitions.SequenceEqual(epwingRecord.Definitions)) + if (previousResult.Definitions.SequenceEqual(definitions)) { if (string.IsNullOrEmpty(previousResult.Reading)) { diff --git a/JL.Core/Dicts/EPWING/IEpwingRecord.cs b/JL.Core/Dicts/EPWING/IEpwingRecord.cs index fd9ea104..ceeb753f 100644 --- a/JL.Core/Dicts/EPWING/IEpwingRecord.cs +++ b/JL.Core/Dicts/EPWING/IEpwingRecord.cs @@ -4,5 +4,5 @@ internal interface IEpwingRecord : IDictRecord { public string PrimarySpelling { get; } public string? Reading { get; } - public string[] Definitions { get; set; } + public string[] Definitions { get; } } diff --git a/JL.Core/Dicts/EPWING/Nazeka/EpwingNazekaLoader.cs b/JL.Core/Dicts/EPWING/Nazeka/EpwingNazekaLoader.cs index c0580332..83f5eeba 100644 --- a/JL.Core/Dicts/EPWING/Nazeka/EpwingNazekaLoader.cs +++ b/JL.Core/Dicts/EPWING/Nazeka/EpwingNazekaLoader.cs @@ -73,17 +73,16 @@ public static async Task Load(Dict dict) string primarySpelling = spellingList[0]; string[]? alternativeSpellings = spellingList.RemoveAtToArray(0); - EpwingNazekaRecord tempRecord = new(primarySpelling, reading, alternativeSpellings, definitions); - if (!EpwingUtils.IsValidEpwingResultForDictType(tempRecord, dict)) + if (!EpwingUtils.IsValidEpwingResultForDictType(primarySpelling, reading, definitions, dict)) { continue; } - AddRecordToDictionary(primarySpelling, tempRecord, nazekaEpwingDict); - - if (dict.Type is not DictType.NonspecificNameNazeka) + EpwingNazekaRecord record = new(primarySpelling, reading, alternativeSpellings, definitions); + AddRecordToDictionary(primarySpelling, record, nazekaEpwingDict); + if (dict.Type is not DictType.NonspecificNameNazeka and not DictType.NonspecificKanjiNazeka) { - AddRecordToDictionary(reading, tempRecord, nazekaEpwingDict); + AddRecordToDictionary(reading, record, nazekaEpwingDict); } for (int i = 1; i < spellingList.Count; i++) @@ -91,26 +90,24 @@ public static async Task Load(Dict dict) primarySpelling = spellingList[i]; alternativeSpellings = spellingList.RemoveAtToArray(i); - tempRecord = new EpwingNazekaRecord(primarySpelling, reading, alternativeSpellings, definitions); - if (!EpwingUtils.IsValidEpwingResultForDictType(tempRecord, dict)) + if (!EpwingUtils.IsValidEpwingResultForDictType(primarySpelling, reading, definitions, dict)) { continue; } - AddRecordToDictionary(primarySpelling, tempRecord, nazekaEpwingDict); + AddRecordToDictionary(primarySpelling, new EpwingNazekaRecord(primarySpelling, reading, alternativeSpellings, definitions), nazekaEpwingDict); } } else { - EpwingNazekaRecord tempRecord = new(reading, null, null, definitions); - - if (!EpwingUtils.IsValidEpwingResultForDictType(tempRecord, dict)) + if (!EpwingUtils.IsValidEpwingResultForDictType(reading, null, definitions, dict)) { continue; } - AddRecordToDictionary(reading, tempRecord, nazekaEpwingDict); + EpwingNazekaRecord record = new(reading, null, null, definitions); + AddRecordToDictionary(reading, record, nazekaEpwingDict); } } @@ -122,7 +119,7 @@ public static async Task Load(Dict dict) dict.Contents.TrimExcess(); } - private static void AddRecordToDictionary(string key, EpwingNazekaRecord record, Dictionary> dictionary) + private static void AddRecordToDictionary(string key, IDictRecord record, Dictionary> dictionary) { string keyInHiragana = JapaneseUtils.KatakanaToHiragana(key).GetPooledString(); if (dictionary.TryGetValue(keyInHiragana, out IList? result)) diff --git a/JL.Core/Dicts/EPWING/Nazeka/EpwingNazekaRecord.cs b/JL.Core/Dicts/EPWING/Nazeka/EpwingNazekaRecord.cs index 81b577ab..df7e238a 100644 --- a/JL.Core/Dicts/EPWING/Nazeka/EpwingNazekaRecord.cs +++ b/JL.Core/Dicts/EPWING/Nazeka/EpwingNazekaRecord.cs @@ -11,7 +11,7 @@ internal sealed class EpwingNazekaRecord : IEpwingRecord, IGetFrequency public string PrimarySpelling { get; } public string? Reading { get; } public string[]? AlternativeSpellings { get; } - public string[] Definitions { get; set; } + public string[] Definitions { get; } public EpwingNazekaRecord(string primarySpelling, string? reading, string[]? alternativeSpellings, string[] definitions) { diff --git a/JL.Core/Dicts/EPWING/Yomichan/EpwingYomichanLoader.cs b/JL.Core/Dicts/EPWING/Yomichan/EpwingYomichanLoader.cs index c6d52273..5a61b7ba 100644 --- a/JL.Core/Dicts/EPWING/Yomichan/EpwingYomichanLoader.cs +++ b/JL.Core/Dicts/EPWING/Yomichan/EpwingYomichanLoader.cs @@ -19,24 +19,73 @@ public static async Task Load(Dict dict) foreach (string jsonFile in jsonFiles) { - List>? jsonObjects; + List>? jsonElementLists; FileStream fileStream = File.OpenRead(jsonFile); await using (fileStream.ConfigureAwait(false)) { - jsonObjects = await JsonSerializer + jsonElementLists = await JsonSerializer .DeserializeAsync>>(fileStream) .ConfigureAwait(false); } - if (jsonObjects is null) + if (jsonElementLists is null) { continue; } - foreach (List jsonObj in jsonObjects) + foreach (List jsonElements in jsonElementLists) { - AddToDictionary(new EpwingYomichanRecord(jsonObj), dict); + string primarySpelling = jsonElements[0].GetString()!.GetPooledString(); + string? reading = jsonElements[1].GetString(); + if (string.IsNullOrEmpty(reading) || reading == primarySpelling) + { + reading = null; + } + else + { + reading = reading.GetPooledString(); + } + + string[]? definitions = EpwingYomichanRecord.GetDefinitions(jsonElements[5]); + definitions?.DeduplicateStringsInArray(); + + if (definitions is null + || !EpwingUtils.IsValidEpwingResultForDictType(primarySpelling, reading, definitions, dict)) + { + continue; + } + + string[]? definitionTags = null; + JsonElement definitionTagsElement = jsonElements[2]; + if (definitionTagsElement.ValueKind is JsonValueKind.String) + { + definitionTags = definitionTagsElement.GetString()!.Split(' ', StringSplitOptions.TrimEntries | StringSplitOptions.RemoveEmptyEntries); + if (definitionTags.Length is 0) + { + definitionTags = null; + } + else + { + definitionTags.DeduplicateStringsInArray(); + } + } + + string[]? wordClasses = jsonElements[3].GetString()!.Split(' ', StringSplitOptions.TrimEntries | StringSplitOptions.RemoveEmptyEntries); + if (wordClasses.Length is 0) + { + wordClasses = null; + } + else + { + wordClasses.DeduplicateStringsInArray(); + } + + //jsonElements[4].TryGetInt32(out int score); + //jsonElements[6].TryGetInt32(out int sequence); + //string[] termTags = jsonElements[7].ToString(); + + AddToDictionary(new EpwingYomichanRecord(primarySpelling, reading, definitions, wordClasses, definitionTags), dict); } } @@ -50,14 +99,7 @@ public static async Task Load(Dict dict) private static void AddToDictionary(IEpwingRecord yomichanRecord, Dict dict) { - if (yomichanRecord.Definitions.Length is 0 - || !EpwingUtils.IsValidEpwingResultForDictType(yomichanRecord, dict)) - { - return; - } - string hiraganaExpression = JapaneseUtils.KatakanaToHiragana(yomichanRecord.PrimarySpelling); - if (dict.Contents.TryGetValue(hiraganaExpression, out IList? records)) { records.Add(yomichanRecord); @@ -67,7 +109,8 @@ private static void AddToDictionary(IEpwingRecord yomichanRecord, Dict dict) dict.Contents[hiraganaExpression] = new List { yomichanRecord }; } - if (dict.Type is not DictType.NonspecificNameYomichan && !string.IsNullOrEmpty(yomichanRecord.Reading)) + if (dict.Type is not DictType.NonspecificNameYomichan and not DictType.NonspecificKanjiWithWordSchemaYomichan and not DictType.KanjigenYomichan + && !string.IsNullOrEmpty(yomichanRecord.Reading)) { string hiraganaReading = JapaneseUtils.KatakanaToHiragana(yomichanRecord.Reading); diff --git a/JL.Core/Dicts/EPWING/Yomichan/EpwingYomichanRecord.cs b/JL.Core/Dicts/EPWING/Yomichan/EpwingYomichanRecord.cs index 0610faf4..0dd6a892 100644 --- a/JL.Core/Dicts/EPWING/Yomichan/EpwingYomichanRecord.cs +++ b/JL.Core/Dicts/EPWING/Yomichan/EpwingYomichanRecord.cs @@ -11,7 +11,7 @@ internal sealed class EpwingYomichanRecord : IEpwingRecord, IGetFrequency { public string PrimarySpelling { get; } public string? Reading { get; } - public string[] Definitions { get; set; } + public string[] Definitions { get; } public string[]? WordClasses { get; } public string[]? DefinitionTags { get; } //public int Score { get; } @@ -27,64 +27,6 @@ public EpwingYomichanRecord(string primarySpelling, string? reading, string[] de DefinitionTags = definitionTags; } - public EpwingYomichanRecord(List jsonElement) - { - PrimarySpelling = jsonElement[0].GetString()!.GetPooledString(); - Reading = jsonElement[1].GetString(); - - if (string.IsNullOrEmpty(Reading) || Reading == PrimarySpelling) - { - Reading = null; - } - - else - { - Reading = Reading.GetPooledString(); - } - - JsonElement definitionTagsElement = jsonElement[2]; - if (definitionTagsElement.ValueKind is JsonValueKind.String) - { - DefinitionTags = definitionTagsElement.GetString()!.Split(' ', StringSplitOptions.TrimEntries | StringSplitOptions.RemoveEmptyEntries); - - if (DefinitionTags.Length is 0) - { - DefinitionTags = null; - } - - else - { - DefinitionTags.DeduplicateStringsInArray(); - } - } - else - { - DefinitionTags = null; - } - - WordClasses = jsonElement[3].GetString()!.Split(' ', StringSplitOptions.TrimEntries | StringSplitOptions.RemoveEmptyEntries); - if (WordClasses.Length is 0) - { - WordClasses = null; - } - - else - { - WordClasses.DeduplicateStringsInArray(); - } - - //jsonElement[4].TryGetInt32(out int score); - //Score = score; - - Definitions = GetDefinitions(jsonElement[5]) ?? Array.Empty(); - Definitions.DeduplicateStringsInArray(); - - //jsonElement[6].TryGetInt32(out int sequence); - //Sequence = sequence; - - //TermTags = jsonElement[7].ToString(); - } - public string BuildFormattedDefinition(DictOptions? options) { StringBuilder defResult = new(); @@ -185,7 +127,7 @@ public int GetFrequencyFromDB(Dictionary> freqDict return frequency; } - private static string[]? GetDefinitions(JsonElement jsonElement) + public static string[]? GetDefinitions(JsonElement jsonElement) { List definitions = new(); foreach (JsonElement definitionElement in jsonElement.EnumerateArray()) @@ -268,31 +210,38 @@ public int GetFrequencyFromDB(Dictionary> freqDict private static YomichanContent GetDefinitionsFromJsonObject(JsonElement jsonElement, string? parentTag = null) { - if (jsonElement.TryGetProperty("content", out JsonElement contentElement)) + JsonElement currentJsonElement = jsonElement; + string? currentParentTag = parentTag; + while (true) { - string? tag = null; - if (jsonElement.TryGetProperty("tag", out JsonElement tagElement)) + if (currentJsonElement.TryGetProperty("content", out JsonElement contentElement)) { - tag = tagElement.GetString(); - } + string? tag = null; + if (currentJsonElement.TryGetProperty("tag", out JsonElement tagElement)) + { + tag = tagElement.GetString(); + } - if (contentElement.ValueKind is JsonValueKind.String) - { - return new YomichanContent(parentTag ?? tag, contentElement.GetString()!.Trim()); - } + if (contentElement.ValueKind is JsonValueKind.String) + { + return new YomichanContent(currentParentTag ?? tag, contentElement.GetString()!.Trim()); + } - if (contentElement.ValueKind is JsonValueKind.Array) - { - return new YomichanContent(parentTag ?? tag, GetDefinitionsFromJsonArray(contentElement, tag)); - } + if (contentElement.ValueKind is JsonValueKind.Array) + { + return new YomichanContent(currentParentTag ?? tag, GetDefinitionsFromJsonArray(contentElement, tag)); + } - if (contentElement.ValueKind is JsonValueKind.Object) - { - return GetDefinitionsFromJsonObject(contentElement, parentTag ?? tag); + if (contentElement.ValueKind is JsonValueKind.Object) + { + currentJsonElement = contentElement; + currentParentTag ??= tag; + continue; + } } - } - return default; + return default; + } } public override bool Equals(object? obj)