Skip to content

Commit

Permalink
ankiclient: add four new aggregate frequency markers
Browse files Browse the repository at this point in the history
This commit adds support for 4 new markers:
1. {frequency-harmonic-rank} (Default: 9999999)
2. {frequency-harmonic-occurrence} (Default: 0)
3. {frequency-average-rank} (Default: 9999999)
4. {frequency-average-occurrence} (Default: 0)

These markers aggregate the term frequency information of multiple
dictionaries into a single numeric value, often used for sorting Anki
cards by frequency.
  • Loading branch information
precondition committed Aug 30, 2024
1 parent 3f5e599 commit 6fb7b0e
Show file tree
Hide file tree
Showing 4 changed files with 269 additions and 28 deletions.
98 changes: 98 additions & 0 deletions src/anki/ankiclient.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1344,6 +1344,8 @@ void AnkiClient::buildCommonNote(
QString(exp.context2).replace('\n', m_currentConfig->newlineReplacer);

QString frequencies = buildFrequencies(exp.frequencies);
const int frequencyHarmonic = getFrequencyHarmonic(exp.frequencies);
const int frequencyAverage = getFrequencyAverage(exp.frequencies);

QJsonArray fieldsWithAudioMedia;
QJsonArray fieldsWithAudioContext;
Expand Down Expand Up @@ -1384,6 +1386,25 @@ void AnkiClient::buildCommonNote(
value.replace(REPLACE_CLOZE_PREFIX, clozePrefix);
value.replace(REPLACE_CLOZE_SUFFIX, clozeSuffix);
value.replace(REPLACE_FREQUENCIES, frequencies);

/* If the term never occurs in the corpus of any loaded frequency
* dictionary, assume it is a very rare word.
* (The higher the ranking, the rarer the term) */
constexpr int default_freq_rank = 9999999;
constexpr int default_freq_occurrence = 0;

value.replace(REPLACE_FREQ_HARMONIC_RANK,
positiveIntToQString(frequencyHarmonic, default_freq_rank));

value.replace(REPLACE_FREQ_HARMONIC_OCCU,
positiveIntToQString(frequencyHarmonic, default_freq_occurrence));

value.replace(REPLACE_FREQ_AVERAGE_RANK,
positiveIntToQString(frequencyAverage, default_freq_rank));

value.replace(REPLACE_FREQ_AVERAGE_OCCU,
positiveIntToQString(frequencyAverage, default_freq_occurrence));

value.replace(REPLACE_SENTENCE, sentence);
value.replace(REPLACE_SENTENCE_SEC, sentence2);
value.replace(REPLACE_CONTEXT, context);
Expand Down Expand Up @@ -1765,6 +1786,83 @@ QString AnkiClient::buildFrequencies(const QList<Frequency> &frequencies)
return freqStr;
}

std::vector<int> AnkiClient::getFrequencyNumbers(
const QList<Frequency> &frequencies)
{
QString previousDictionary;
std::vector<int> frequencyNumbers;

for (const Frequency &frequencyEntry : frequencies)
{
if (frequencyEntry.dictionary == previousDictionary
|| frequencyEntry.freq.isNull())
{
continue;
}
previousDictionary = frequencyEntry.dictionary;

/* This regular expression only catches numbers in base 10 and
* would not catch negative or decimal numbers because we make
* the assumption that these special types of numbers will not
* appear in frequency dictionaries. */
QRegularExpression numberPattern("\\d+");
QRegularExpressionMatch match = numberPattern.match(frequencyEntry.freq);

if (match.hasMatch())
{
/* Only save the first number to avoid counting secondary frequency
* information (e.g. frequency for the full kana orthography) in the
* aggregate measures to align with Yomitan's behavior. */
frequencyNumbers.push_back(match.captured(0).toInt());
continue;
}
}

return frequencyNumbers;
}

QString AnkiClient::positiveIntToQString(const int value, const int defaultValue)
{
return (value < 0) ? QString::number(defaultValue) : QString::number(value);
}

int AnkiClient::getFrequencyHarmonic(const QList<Frequency> &frequencies)
{
const std::vector<int> frequencyNumbers = getFrequencyNumbers(frequencies);

if (frequencyNumbers.empty())
{
return -1;
}

double total = 0.0;
for (int frequencyNum : frequencyNumbers)
{
if (frequencyNum != 0)
{
total += 1.0 / frequencyNum;
}
}

return std::floor(frequencyNumbers.size() / total);
}

int AnkiClient::getFrequencyAverage(const QList<Frequency> &frequencies)
{
const std::vector<int> frequencyNumbers = getFrequencyNumbers(frequencies);

if (frequencyNumbers.empty())
{
return -1;
}

/* Sum the elements in the vector */
double total = std::accumulate(frequencyNumbers.begin(),
frequencyNumbers.end(), 0);

return std::floor(total / frequencyNumbers.size());
}

void AnkiClient::buildTags(
const QList<Tag> &tags,
QString &tagStr,
Expand Down
31 changes: 31 additions & 0 deletions src/anki/ankiclient.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,10 @@
#define REPLACE_CONTEXT "{context}"
#define REPLACE_CONTEXT_SEC "{context-2}"
#define REPLACE_FREQUENCIES "{frequencies}"
#define REPLACE_FREQ_HARMONIC_RANK "{frequency-harmonic-rank}"
#define REPLACE_FREQ_HARMONIC_OCCU "{frequency-harmonic-occurrence}"
#define REPLACE_FREQ_AVERAGE_RANK "{frequency-average-rank}"
#define REPLACE_FREQ_AVERAGE_OCCU "{frequency-average-occurrence}"
#define REPLACE_GLOSSARY "{glossary}"
#define REPLACE_SCREENSHOT "{screenshot}"
#define REPLACE_SCREENSHOT_VIDEO "{screenshot-video}"
Expand Down Expand Up @@ -445,6 +449,33 @@ private Q_SLOTS:
*/
QString buildFrequencies(const QList<Frequency> &freq);


/**
* Extracts frequency numbers from a list of frequency tags.
* @param frequencies A list of Frequency structs.
* @return A vector of positive integers representing the frequency numbers.
* (Only selecting the first frequency displayed by a dictionary,
* to avoid picking secondary frequencies like kana frequencies)
*/
std::vector<int> getFrequencyNumbers(const QList<Frequency> &freq);

// Function to convert integer to QString with a specific default value if `value` < 0.
QString positiveIntToQString(const int value, const int defaultValue);

/**
* Function to calculate the harmonic mean of frequencies.
* @param frequencies A list of Frequency structs.
* @return The harmonic mean as an integer, or -1 if the list is empty.
*/
int getFrequencyHarmonic(const QList<Frequency> &freq);

/**
* Function to calculate the arithmetic average of frequencies.
* @param frequencies A list of Frequency structs.
* @return The arithmetic average as an integer, or -1 if the list is empty.
*/
int getFrequencyAverage(const QList<Frequency> &freq);

/**
* Creates the HTML representation of the pitch, pitch graph, and pitch
* position for the given pitches.
Expand Down
8 changes: 8 additions & 0 deletions src/gui/widgets/settings/ankisettings.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,10 @@ AnkiSettings::AnkiSettings(QWidget *parent)
REPLACE_CONTEXT,
REPLACE_CONTEXT_SEC,
REPLACE_FREQUENCIES,
REPLACE_FREQ_HARMONIC_RANK,
REPLACE_FREQ_HARMONIC_OCCU,
REPLACE_FREQ_AVERAGE_RANK,
REPLACE_FREQ_AVERAGE_OCCU,
REPLACE_GLOSSARY,
REPLACE_SCREENSHOT,
REPLACE_SCREENSHOT_VIDEO,
Expand Down Expand Up @@ -85,6 +89,10 @@ AnkiSettings::AnkiSettings(QWidget *parent)
REPLACE_CONTEXT,
REPLACE_CONTEXT_SEC,
REPLACE_FREQUENCIES,
REPLACE_FREQ_HARMONIC_RANK,
REPLACE_FREQ_HARMONIC_OCCU,
REPLACE_FREQ_AVERAGE_RANK,
REPLACE_FREQ_AVERAGE_OCCU,
REPLACE_GLOSSARY,
REPLACE_SCREENSHOT,
REPLACE_SCREENSHOT_VIDEO,
Expand Down
Loading

0 comments on commit 6fb7b0e

Please sign in to comment.