Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ankiclient: add four new aggregate frequency markers #238

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
98 changes: 98 additions & 0 deletions src/anki/ankiclient.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1344,6 +1344,8 @@ void AnkiClient::buildCommonNote(
QString(exp.context2).replace('\n', m_currentConfig->newlineReplacer);

QString frequencies = buildFrequencies(exp.frequencies);
const int frequencyHarmonic = getFrequencyHarmonic(exp.frequencies);
const int frequencyAverage = getFrequencyAverage(exp.frequencies);

QJsonArray fieldsWithAudioMedia;
QJsonArray fieldsWithAudioContext;
Expand Down Expand Up @@ -1384,6 +1386,25 @@ void AnkiClient::buildCommonNote(
value.replace(REPLACE_CLOZE_PREFIX, clozePrefix);
value.replace(REPLACE_CLOZE_SUFFIX, clozeSuffix);
value.replace(REPLACE_FREQUENCIES, frequencies);

/* If the term never occurs in the corpus of any loaded frequency
* dictionary, assume it is a very rare word.
* (The higher the ranking, the rarer the term) */
constexpr int default_freq_rank = 9999999;
constexpr int default_freq_occurrence = 0;

value.replace(REPLACE_FREQ_HARMONIC_RANK,
positiveIntToQString(frequencyHarmonic, default_freq_rank));

value.replace(REPLACE_FREQ_HARMONIC_OCCU,
positiveIntToQString(frequencyHarmonic, default_freq_occurrence));

value.replace(REPLACE_FREQ_AVERAGE_RANK,
positiveIntToQString(frequencyAverage, default_freq_rank));

value.replace(REPLACE_FREQ_AVERAGE_OCCU,
positiveIntToQString(frequencyAverage, default_freq_occurrence));

value.replace(REPLACE_SENTENCE, sentence);
value.replace(REPLACE_SENTENCE_SEC, sentence2);
value.replace(REPLACE_CONTEXT, context);
Expand Down Expand Up @@ -1765,6 +1786,83 @@ QString AnkiClient::buildFrequencies(const QList<Frequency> &frequencies)
return freqStr;
}

std::vector<int> AnkiClient::getFrequencyNumbers(
const QList<Frequency> &frequencies)
{
QString previousDictionary;
std::vector<int> frequencyNumbers;

for (const Frequency &frequencyEntry : frequencies)
{
if (frequencyEntry.dictionary == previousDictionary
|| frequencyEntry.freq.isNull())
{
continue;
}
previousDictionary = frequencyEntry.dictionary;

/* This regular expression only catches numbers in base 10 and
* would not catch negative or decimal numbers because we make
* the assumption that these special types of numbers will not
* appear in frequency dictionaries. */
QRegularExpression numberPattern("\\d+");
QRegularExpressionMatch match = numberPattern.match(frequencyEntry.freq);

if (match.hasMatch())
{
/* Only save the first number to avoid counting secondary frequency
* information (e.g. frequency for the full kana orthography) in the
* aggregate measures to align with Yomitan's behavior. */
frequencyNumbers.push_back(match.captured(0).toInt());
continue;
}
}

return frequencyNumbers;
}

QString AnkiClient::positiveIntToQString(const int value, const int defaultValue)
{
return (value < 0) ? QString::number(defaultValue) : QString::number(value);
}

int AnkiClient::getFrequencyHarmonic(const QList<Frequency> &frequencies)
{
const std::vector<int> frequencyNumbers = getFrequencyNumbers(frequencies);

if (frequencyNumbers.empty())
{
return -1;
}

double total = 0.0;
for (int frequencyNum : frequencyNumbers)
{
if (frequencyNum != 0)
{
total += 1.0 / frequencyNum;
}
}

return std::floor(frequencyNumbers.size() / total);
}

int AnkiClient::getFrequencyAverage(const QList<Frequency> &frequencies)
{
const std::vector<int> frequencyNumbers = getFrequencyNumbers(frequencies);

if (frequencyNumbers.empty())
{
return -1;
}

/* Sum the elements in the vector */
double total = std::accumulate(frequencyNumbers.begin(),
frequencyNumbers.end(), 0);

return std::floor(total / frequencyNumbers.size());
}

void AnkiClient::buildTags(
const QList<Tag> &tags,
QString &tagStr,
Expand Down
31 changes: 31 additions & 0 deletions src/anki/ankiclient.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,10 @@
#define REPLACE_CONTEXT "{context}"
#define REPLACE_CONTEXT_SEC "{context-2}"
#define REPLACE_FREQUENCIES "{frequencies}"
#define REPLACE_FREQ_HARMONIC_RANK "{frequency-harmonic-rank}"
#define REPLACE_FREQ_HARMONIC_OCCU "{frequency-harmonic-occurrence}"
#define REPLACE_FREQ_AVERAGE_RANK "{frequency-average-rank}"
#define REPLACE_FREQ_AVERAGE_OCCU "{frequency-average-occurrence}"
#define REPLACE_GLOSSARY "{glossary}"
#define REPLACE_SCREENSHOT "{screenshot}"
#define REPLACE_SCREENSHOT_VIDEO "{screenshot-video}"
Expand Down Expand Up @@ -445,6 +449,33 @@ private Q_SLOTS:
*/
QString buildFrequencies(const QList<Frequency> &freq);


/**
* Extracts frequency numbers from a list of frequency tags.
* @param frequencies A list of Frequency structs.
* @return A vector of positive integers representing the frequency numbers.
* (Only selecting the first frequency displayed by a dictionary,
* to avoid picking secondary frequencies like kana frequencies)
*/
std::vector<int> getFrequencyNumbers(const QList<Frequency> &freq);

// Function to convert integer to QString with a specific default value if `value` < 0.
QString positiveIntToQString(const int value, const int defaultValue);

/**
* Function to calculate the harmonic mean of frequencies.
* @param frequencies A list of Frequency structs.
* @return The harmonic mean as an integer, or -1 if the list is empty.
*/
int getFrequencyHarmonic(const QList<Frequency> &freq);

/**
* Function to calculate the arithmetic average of frequencies.
* @param frequencies A list of Frequency structs.
* @return The arithmetic average as an integer, or -1 if the list is empty.
*/
int getFrequencyAverage(const QList<Frequency> &freq);

/**
* Creates the HTML representation of the pitch, pitch graph, and pitch
* position for the given pitches.
Expand Down
8 changes: 8 additions & 0 deletions src/gui/widgets/settings/ankisettings.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,10 @@ AnkiSettings::AnkiSettings(QWidget *parent)
REPLACE_CONTEXT,
REPLACE_CONTEXT_SEC,
REPLACE_FREQUENCIES,
REPLACE_FREQ_HARMONIC_RANK,
REPLACE_FREQ_HARMONIC_OCCU,
REPLACE_FREQ_AVERAGE_RANK,
REPLACE_FREQ_AVERAGE_OCCU,
REPLACE_GLOSSARY,
REPLACE_SCREENSHOT,
REPLACE_SCREENSHOT_VIDEO,
Expand Down Expand Up @@ -85,6 +89,10 @@ AnkiSettings::AnkiSettings(QWidget *parent)
REPLACE_CONTEXT,
REPLACE_CONTEXT_SEC,
REPLACE_FREQUENCIES,
REPLACE_FREQ_HARMONIC_RANK,
REPLACE_FREQ_HARMONIC_OCCU,
REPLACE_FREQ_AVERAGE_RANK,
REPLACE_FREQ_AVERAGE_OCCU,
REPLACE_GLOSSARY,
REPLACE_SCREENSHOT,
REPLACE_SCREENSHOT_VIDEO,
Expand Down
Loading
Loading