Skip to content

Commit

Permalink
Add "Pitch Accent (Numeric)" field for mining (partially implements #72)
Browse files Browse the repository at this point in the history
  • Loading branch information
rampaa committed Feb 3, 2024
1 parent 41f48f0 commit 4749dce
Show file tree
Hide file tree
Showing 4 changed files with 95 additions and 0 deletions.
1 change: 1 addition & 0 deletions JL.Core/Mining/JLField.cs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ public enum JLField
[Description("Local Time")] LocalTime,
Frequencies,
[Description("Raw Frequencies")] RawFrequencies,
[Description("Pitch Accents (Numeric)")] NumericPitchAccents,

// JMdict, JMnedict, KANJIDIC2
[Description("EDICT ID")] EdictId,
Expand Down
3 changes: 3 additions & 0 deletions JL.Core/Mining/JLFieldUtils.cs
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ public static class JLFieldUtils
JLField.DeconjugationProcess,
JLField.Frequencies,
JLField.RawFrequencies,
JLField.NumericPitchAccents,
JLField.EdictId,
JLField.LocalTime
};
Expand All @@ -46,6 +47,7 @@ public static class JLFieldUtils
JLField.Sentence,
JLField.Frequencies,
JLField.RawFrequencies,
JLField.NumericPitchAccents,
JLField.EdictId,
JLField.LocalTime
};
Expand All @@ -62,6 +64,7 @@ public static class JLFieldUtils
JLField.Image,
JLField.SourceText,
JLField.Sentence,
JLField.NumericPitchAccents,
JLField.EdictId,
JLField.LocalTime
};
Expand Down
88 changes: 88 additions & 0 deletions JL.Core/Mining/MiningUtils.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
using System.Text;
using JL.Core.Audio;
using JL.Core.Dicts;
using JL.Core.Dicts.PitchAccent;
using JL.Core.Lookup;
using JL.Core.Mining.Anki;
using JL.Core.Network;
Expand Down Expand Up @@ -131,9 +132,96 @@ private static Dictionary<JLField, string> GetMiningParameters(LookupResult look
miningParams[JLField.RadicalNames] = string.Join(", ", lookupResult.RadicalNames);
}

if (DictUtils.SingleDictTypeDicts.TryGetValue(DictType.PitchAccentYomichan, out Dict? pitchDict))
{
if (pitchDict.Active)
{
List<KeyValuePair<string, int>>? pitchAccents = GetPitchAccents(lookupResult.PitchAccentDict ?? pitchDict.Contents, lookupResult);
if (pitchAccents is not null)
{
StringBuilder numericPitchAccentBuilder = new();
for (int i = 0; i < pitchAccents.Count; i++)
{
KeyValuePair<string, int> pitchAccent = pitchAccents[i];
_ = numericPitchAccentBuilder.Append(CultureInfo.InvariantCulture, $"{pitchAccent.Key}: {pitchAccent.Value}, ");
}

_ = numericPitchAccentBuilder.Remove(numericPitchAccentBuilder.Length - 2, 2);
miningParams[JLField.NumericPitchAccents] = numericPitchAccentBuilder.ToString();
}
}
}

return miningParams;
}

private static List<KeyValuePair<string, int>>? GetPitchAccents(Dictionary<string, IList<IDictRecord>> pitchDict, LookupResult lookupResult)
{
List<KeyValuePair<string, int>> pitchAccents = new();

if (lookupResult.Readings is not null)
{
for (int i = 0; i < lookupResult.Readings.Length; i++)
{
string reading = lookupResult.Readings[i];
string readingInHiragana = JapaneseUtils.KatakanaToHiragana(reading);

if (pitchDict.TryGetValue(readingInHiragana, out IList<IDictRecord>? pitchResult))
{
foreach (IDictRecord dictRecord in pitchResult)
{
PitchAccentRecord pitchAccentRecord = (PitchAccentRecord)dictRecord;
if (lookupResult.PrimarySpelling == pitchAccentRecord.Spelling
|| (lookupResult.AlternativeSpellings?.Contains(pitchAccentRecord.Spelling) ?? false))
{
pitchAccents.Add(KeyValuePair.Create(reading, pitchAccentRecord.Position));
break;
}
}
}
}
}

else
{
string primarySpellingInHiragana = JapaneseUtils.KatakanaToHiragana(lookupResult.PrimarySpelling);
if (pitchDict.TryGetValue(primarySpellingInHiragana, out IList<IDictRecord>? pitchResult))
{
foreach (IDictRecord dictRecord in pitchResult)
{
PitchAccentRecord pitchAccentRecord = (PitchAccentRecord)dictRecord;
if (pitchAccentRecord.Reading is null)
{
pitchAccents.Add(KeyValuePair.Create(lookupResult.PrimarySpelling, pitchAccentRecord.Position));
break;
}
}
}

else if (lookupResult.AlternativeSpellings is not null)
{
for (int i = 0; i < lookupResult.AlternativeSpellings.Length; i++)
{
string alternativeSpellingInHiragana = JapaneseUtils.KatakanaToHiragana(lookupResult.AlternativeSpellings[i]);
if (pitchDict.TryGetValue(alternativeSpellingInHiragana, out pitchResult))
{
foreach (IDictRecord dictRecord in pitchResult)
{
PitchAccentRecord pitchAccentRecord = (PitchAccentRecord)dictRecord;
if (pitchAccentRecord.Reading is null)
{
pitchAccents.Add(KeyValuePair.Create(lookupResult.PrimarySpelling, pitchAccentRecord.Position));
break;
}
}
}
}
}
}

return pitchAccents.Count > 0 ? pitchAccents : null;
}

public static async Task MineToFile(LookupResult lookupResult, string currentText, string? selectedDefinitions, int currentCharPosition)
{
string filePath;
Expand Down
3 changes: 3 additions & 0 deletions JL.Windows/GUI/PreferencesWindow.xaml.cs
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ public static bool IsItVisible()
• Deconjugation Process: Deconjugation path from the "Matched Text" to "Deconjugated Matched Text".
• Frequencies: Frequency info for the mined word, e.g., "VN: 77, jpdb: 666".
• Raw Frequencies: Raw frequency info for the mined word, e.g., "77, 666".
• Pitch Accents (Numeric): Pitch accents for the mined word in numeric form, e.g., おんな: 3, おみな: 0, おうな: 1
• EDICT ID: JMDict entry ID.
• Local Time: Mining date and time expressed in local timezone.
""";
Expand All @@ -84,6 +85,7 @@ public static bool IsItVisible()
• Sentence: Sentence in which the mined kanji appears in.
• Frequencies: Frequency info for the kanji, e.g., "KANJIDIC2: 77, jpdb: 666".
• Raw Frequencies: Raw frequency info for the mined word, e.g., "77, 666".
• Pitch Accents (Numeric): Pitch accents for the mined word in numeric form, e.g., おんな: 3, おみな: 0, おうな: 1
• EDICT ID: KANJIDIC2 entry ID.
• Local Time: Mining date and time expressed in local timezone.
""";
Expand All @@ -97,6 +99,7 @@ public static bool IsItVisible()
• Dictionary Name: Name of the dictionary, e.g., "JMnedict".
• Source Text: Whole text in which the mined name appears in.
• Sentence: Sentence in which the mined name appears in.
• Pitch Accents (Numeric): Pitch accents for the mined word in numeric form, e.g., おんな: 3, おみな: 0, おうな: 1
• EDICT ID: JMnedict entry ID.
• Local Time: Mining date and time expressed in local timezone.
""";
Expand Down

0 comments on commit 4749dce

Please sign in to comment.