From 6fb7b0ecfd0e215b8a2cf954d9c45a212cc82cd9 Mon Sep 17 00:00:00 2001 From: precondition <57645186+precondition@users.noreply.github.com> Date: Wed, 28 Aug 2024 15:12:01 +0200 Subject: [PATCH] ankiclient: add four new aggregate frequency markers This commit adds support for 4 new markers: 1. {frequency-harmonic-rank} (Default: 9999999) 2. {frequency-harmonic-occurrence} (Default: 0) 3. {frequency-average-rank} (Default: 9999999) 4. {frequency-average-occurrence} (Default: 0) These markers aggregate the term frequency information of multiple dictionaries into a single numeric value, often used for sorting Anki cards by frequency. --- src/anki/ankiclient.cpp | 98 ++++++++++++ src/anki/ankiclient.h | 31 ++++ src/gui/widgets/settings/ankisettings.cpp | 8 + src/gui/widgets/settings/ankisettingshelp.ui | 160 +++++++++++++++---- 4 files changed, 269 insertions(+), 28 deletions(-) diff --git a/src/anki/ankiclient.cpp b/src/anki/ankiclient.cpp index 5a7d131..ee21e73 100644 --- a/src/anki/ankiclient.cpp +++ b/src/anki/ankiclient.cpp @@ -1344,6 +1344,8 @@ void AnkiClient::buildCommonNote( QString(exp.context2).replace('\n', m_currentConfig->newlineReplacer); QString frequencies = buildFrequencies(exp.frequencies); + const int frequencyHarmonic = getFrequencyHarmonic(exp.frequencies); + const int frequencyAverage = getFrequencyAverage(exp.frequencies); QJsonArray fieldsWithAudioMedia; QJsonArray fieldsWithAudioContext; @@ -1384,6 +1386,25 @@ void AnkiClient::buildCommonNote( value.replace(REPLACE_CLOZE_PREFIX, clozePrefix); value.replace(REPLACE_CLOZE_SUFFIX, clozeSuffix); value.replace(REPLACE_FREQUENCIES, frequencies); + + /* If the term never occurs in the corpus of any loaded frequency + * dictionary, assume it is a very rare word. + * (The higher the ranking, the rarer the term) */ + constexpr int default_freq_rank = 9999999; + constexpr int default_freq_occurrence = 0; + + value.replace(REPLACE_FREQ_HARMONIC_RANK, + positiveIntToQString(frequencyHarmonic, default_freq_rank)); + + value.replace(REPLACE_FREQ_HARMONIC_OCCU, + positiveIntToQString(frequencyHarmonic, default_freq_occurrence)); + + value.replace(REPLACE_FREQ_AVERAGE_RANK, + positiveIntToQString(frequencyAverage, default_freq_rank)); + + value.replace(REPLACE_FREQ_AVERAGE_OCCU, + positiveIntToQString(frequencyAverage, default_freq_occurrence)); + value.replace(REPLACE_SENTENCE, sentence); value.replace(REPLACE_SENTENCE_SEC, sentence2); value.replace(REPLACE_CONTEXT, context); @@ -1765,6 +1786,83 @@ QString AnkiClient::buildFrequencies(const QList &frequencies) return freqStr; } +std::vector AnkiClient::getFrequencyNumbers( + const QList &frequencies) +{ + QString previousDictionary; + std::vector frequencyNumbers; + + for (const Frequency &frequencyEntry : frequencies) + { + if (frequencyEntry.dictionary == previousDictionary + || frequencyEntry.freq.isNull()) + { + continue; + } + previousDictionary = frequencyEntry.dictionary; + + /* This regular expression only catches numbers in base 10 and + * would not catch negative or decimal numbers because we make + * the assumption that these special types of numbers will not + * appear in frequency dictionaries. */ + QRegularExpression numberPattern("\\d+"); + QRegularExpressionMatch match = numberPattern.match(frequencyEntry.freq); + + if (match.hasMatch()) + { + /* Only save the first number to avoid counting secondary frequency + * information (e.g. frequency for the full kana orthography) in the + * aggregate measures to align with Yomitan's behavior. */ + frequencyNumbers.push_back(match.captured(0).toInt()); + continue; + } + } + + return frequencyNumbers; +} + +QString AnkiClient::positiveIntToQString(const int value, const int defaultValue) +{ + return (value < 0) ? QString::number(defaultValue) : QString::number(value); +} + +int AnkiClient::getFrequencyHarmonic(const QList &frequencies) +{ + const std::vector frequencyNumbers = getFrequencyNumbers(frequencies); + + if (frequencyNumbers.empty()) + { + return -1; + } + + double total = 0.0; + for (int frequencyNum : frequencyNumbers) + { + if (frequencyNum != 0) + { + total += 1.0 / frequencyNum; + } + } + + return std::floor(frequencyNumbers.size() / total); +} + +int AnkiClient::getFrequencyAverage(const QList &frequencies) +{ + const std::vector frequencyNumbers = getFrequencyNumbers(frequencies); + + if (frequencyNumbers.empty()) + { + return -1; + } + + /* Sum the elements in the vector */ + double total = std::accumulate(frequencyNumbers.begin(), + frequencyNumbers.end(), 0); + + return std::floor(total / frequencyNumbers.size()); +} + void AnkiClient::buildTags( const QList &tags, QString &tagStr, diff --git a/src/anki/ankiclient.h b/src/anki/ankiclient.h index 75a74dd..c95df5b 100644 --- a/src/anki/ankiclient.h +++ b/src/anki/ankiclient.h @@ -42,6 +42,10 @@ #define REPLACE_CONTEXT "{context}" #define REPLACE_CONTEXT_SEC "{context-2}" #define REPLACE_FREQUENCIES "{frequencies}" +#define REPLACE_FREQ_HARMONIC_RANK "{frequency-harmonic-rank}" +#define REPLACE_FREQ_HARMONIC_OCCU "{frequency-harmonic-occurrence}" +#define REPLACE_FREQ_AVERAGE_RANK "{frequency-average-rank}" +#define REPLACE_FREQ_AVERAGE_OCCU "{frequency-average-occurrence}" #define REPLACE_GLOSSARY "{glossary}" #define REPLACE_SCREENSHOT "{screenshot}" #define REPLACE_SCREENSHOT_VIDEO "{screenshot-video}" @@ -445,6 +449,33 @@ private Q_SLOTS: */ QString buildFrequencies(const QList &freq); + + /** + * Extracts frequency numbers from a list of frequency tags. + * @param frequencies A list of Frequency structs. + * @return A vector of positive integers representing the frequency numbers. + * (Only selecting the first frequency displayed by a dictionary, + * to avoid picking secondary frequencies like kana frequencies) + */ + std::vector getFrequencyNumbers(const QList &freq); + + // Function to convert integer to QString with a specific default value if `value` < 0. + QString positiveIntToQString(const int value, const int defaultValue); + + /** + * Function to calculate the harmonic mean of frequencies. + * @param frequencies A list of Frequency structs. + * @return The harmonic mean as an integer, or -1 if the list is empty. + */ + int getFrequencyHarmonic(const QList &freq); + + /** + * Function to calculate the arithmetic average of frequencies. + * @param frequencies A list of Frequency structs. + * @return The arithmetic average as an integer, or -1 if the list is empty. + */ + int getFrequencyAverage(const QList &freq); + /** * Creates the HTML representation of the pitch, pitch graph, and pitch * position for the given pitches. diff --git a/src/gui/widgets/settings/ankisettings.cpp b/src/gui/widgets/settings/ankisettings.cpp index af40739..fa18dd9 100644 --- a/src/gui/widgets/settings/ankisettings.cpp +++ b/src/gui/widgets/settings/ankisettings.cpp @@ -51,6 +51,10 @@ AnkiSettings::AnkiSettings(QWidget *parent) REPLACE_CONTEXT, REPLACE_CONTEXT_SEC, REPLACE_FREQUENCIES, + REPLACE_FREQ_HARMONIC_RANK, + REPLACE_FREQ_HARMONIC_OCCU, + REPLACE_FREQ_AVERAGE_RANK, + REPLACE_FREQ_AVERAGE_OCCU, REPLACE_GLOSSARY, REPLACE_SCREENSHOT, REPLACE_SCREENSHOT_VIDEO, @@ -85,6 +89,10 @@ AnkiSettings::AnkiSettings(QWidget *parent) REPLACE_CONTEXT, REPLACE_CONTEXT_SEC, REPLACE_FREQUENCIES, + REPLACE_FREQ_HARMONIC_RANK, + REPLACE_FREQ_HARMONIC_OCCU, + REPLACE_FREQ_AVERAGE_RANK, + REPLACE_FREQ_AVERAGE_OCCU, REPLACE_GLOSSARY, REPLACE_SCREENSHOT, REPLACE_SCREENSHOT_VIDEO, diff --git a/src/gui/widgets/settings/ankisettingshelp.ui b/src/gui/widgets/settings/ankisettingshelp.ui index ace660f..7a57f20 100644 --- a/src/gui/widgets/settings/ankisettingshelp.ui +++ b/src/gui/widgets/settings/ankisettingshelp.ui @@ -62,21 +62,21 @@ These will expand to larger expressions in the final card. - + Screenshot of the current frame. - + Title of the video. Filename if no title. - + @@ -95,14 +95,14 @@ These will expand to larger expressions in the final card. - + Pitch graphs for the reading of the term. - + @@ -128,7 +128,7 @@ These will expand to larger expressions in the final card. - + @@ -154,7 +154,7 @@ These will expand to larger expressions in the final card. - + The reading of the word in kana. @@ -202,7 +202,7 @@ These will expand to larger expressions in the final card. - + Screenshot of the current frame without subtitles if visible. @@ -228,7 +228,7 @@ These will expand to larger expressions in the final card. - + The current secondary subtitle. @@ -242,7 +242,7 @@ These will expand to larger expressions in the final card. - + @@ -322,14 +322,14 @@ These will expand to larger expressions in the final card. - + Pitch positions for the reading of the term. - + @@ -348,7 +348,7 @@ These will expand to larger expressions in the final card. - + The dictionary definitions of the expression. @@ -413,14 +413,14 @@ These will expand to larger expressions in the final card. - + The dictionary definitions of the expression without dictionary information. - + @@ -439,7 +439,7 @@ These will expand to larger expressions in the final card. - + @@ -458,7 +458,7 @@ These will expand to larger expressions in the final card. - + Bulleted list of the term tags. @@ -536,7 +536,111 @@ These will expand to larger expressions in the final card. - + + + + + 75 + true + + + + {frequency-harmonic-rank} + + + Qt::AlignCenter + + + Qt::TextSelectableByKeyboard|Qt::TextSelectableByMouse + + + + + + + The harmonic mean of frequency data for the current term. Defaults to rank 9999999 when frequency data is not found. + + + + + + + + 75 + true + + + + {frequency-harmonic-occurrence} + + + Qt::AlignCenter + + + Qt::TextSelectableByKeyboard|Qt::TextSelectableByMouse + + + + + + + The harmonic mean of frequency data for the current term. Defaults to 0 occurrences when frequency data is not found. + + + + + + + + 75 + true + + + + {frequency-average-rank} + + + Qt::AlignCenter + + + Qt::TextSelectableByKeyboard|Qt::TextSelectableByMouse + + + + + + + The average of frequency data for the current term. Defaults to rank 9999999 when frequency data is not found. + + + + + + + + 75 + true + + + + {frequency-average-occurrence} + + + Qt::AlignCenter + + + Qt::TextSelectableByKeyboard|Qt::TextSelectableByMouse + + + + + + + The average of frequency data for the current term. Defaults to 0 occurrences when frequency data is not found. + + + + @@ -555,7 +659,7 @@ These will expand to larger expressions in the final card. - + @@ -594,7 +698,7 @@ These will expand to larger expressions in the final card. - + @@ -613,14 +717,14 @@ These will expand to larger expressions in the final card. - + Pitch accents for the reading of the term. - + The current subtitle. @@ -669,7 +773,7 @@ These will expand to larger expressions in the final card. - + @@ -688,7 +792,7 @@ These will expand to larger expressions in the final card. - + @@ -707,14 +811,14 @@ These will expand to larger expressions in the final card. - + Bulleted list of the term tags without description. - + @@ -752,7 +856,7 @@ These will expand to larger expressions in the final card. - + @@ -771,7 +875,7 @@ These will expand to larger expressions in the final card. - + The dictionary definitions of the expression in a compact form.