diff --git a/src/dict/CMakeLists.txt b/src/dict/CMakeLists.txt index c4fcfb6..c39555d 100644 --- a/src/dict/CMakeLists.txt +++ b/src/dict/CMakeLists.txt @@ -1,3 +1,47 @@ +add_library(searchquery INTERFACE searchquery.h) +target_compile_features(searchquery INTERFACE cxx_std_17) +target_compile_options(searchquery INTERFACE ${MEMENTO_COMPILER_FLAGS}) +target_include_directories(searchquery INTERFACE ${MEMENTO_INCLUDE_DIRS}) +target_link_libraries(searchquery INTERFACE Qt6::Core) + +add_library(querygenerator INTERFACE querygenerator.h) +target_compile_features(querygenerator INTERFACE cxx_std_17) +target_compile_options(querygenerator INTERFACE ${MEMENTO_COMPILER_FLAGS}) +target_include_directories(querygenerator INTERFACE ${MEMENTO_INCLUDE_DIRS}) +target_link_libraries( + querygenerator + INTERFACE Qt6::Core + INTERFACE searchquery +) + +add_library( + exactquerygenerator STATIC + exactquerygenerator.cpp + exactquerygenerator.h +) +target_compile_features(exactquerygenerator PRIVATE cxx_std_17) +target_compile_options(exactquerygenerator PRIVATE ${MEMENTO_COMPILER_FLAGS}) +target_include_directories(exactquerygenerator PRIVATE ${MEMENTO_INCLUDE_DIRS}) +target_link_libraries( + exactquerygenerator + PUBLIC querygenerator +) + +add_library( + mecabquerygenerator STATIC + mecabquerygenerator.cpp + mecabquerygenerator.h +) +target_compile_features(mecabquerygenerator PRIVATE cxx_std_17) +target_compile_options(mecabquerygenerator PRIVATE ${MEMENTO_COMPILER_FLAGS}) +target_include_directories(mecabquerygenerator PRIVATE ${MEMENTO_INCLUDE_DIRS}) +target_link_libraries( + mecabquerygenerator + PRIVATE utils + PUBLIC MeCab::MeCab + PUBLIC querygenerator +) + add_library( yomidbbuilder STATIC yomidbbuilder.c @@ -34,8 +78,10 @@ target_compile_options(dictionary_db PRIVATE ${MEMENTO_COMPILER_FLAGS}) target_include_directories(dictionary_db PRIVATE ${MEMENTO_INCLUDE_DIRS}) target_link_libraries( dictionary_db - PRIVATE MeCab::MeCab + PRIVATE exactquerygenerator + PRIVATE mecabquerygenerator PRIVATE Qt6::Widgets + PRIVATE querygenerator PRIVATE SQLite::SQLite3 PRIVATE yomidbbuilder PUBLIC Qt6::Core diff --git a/src/dict/databasemanager.cpp b/src/dict/databasemanager.cpp index b01923f..707ce23 100644 --- a/src/dict/databasemanager.cpp +++ b/src/dict/databasemanager.cpp @@ -221,7 +221,7 @@ int DatabaseManager::disableDictionaries(const QStringList &dicts) #define QUERY "SELECT title FROM directory;" -QStringList DatabaseManager::getDictionaries() +QStringList DatabaseManager::getDictionaries() const { m_dbLock.lockForRead(); @@ -249,7 +249,7 @@ QStringList DatabaseManager::getDictionaries() #define QUERY "SELECT dic_id FROM dict_disabled;" -QStringList DatabaseManager::getDisabledDictionaries() +QStringList DatabaseManager::getDisabledDictionaries() const { m_dbLock.lockForRead(); @@ -302,7 +302,7 @@ QStringList DatabaseManager::getDisabledDictionaries() #define COLUMN_EXPRESSION 0 #define COLUMN_READING 1 -QString DatabaseManager::queryTerms(const QString &query, QList &terms) +QString DatabaseManager::queryTerms(const QString &query, QList &terms) const { if (m_db == nullptr) { @@ -439,7 +439,7 @@ QString DatabaseManager::queryTerms(const QString &query, QList &ter #define TAG_NAME_CODE "code" #define TAG_NAME_INDEX "index" -QString DatabaseManager::queryKanji(const QString &query, Kanji &kanji) +QString DatabaseManager::queryKanji(const QString &query, Kanji &kanji) const { if (m_db == nullptr) { @@ -497,21 +497,21 @@ QString DatabaseManager::queryKanji(const QString &query, Kanji &kanji) it != map.constKeyValueEnd(); ++it) { - const Tag *tag = &m_tagCache[id][it->first]; + Tag tag = m_tagCache[id][it->first]; QList> *list = nullptr; - if (tag->category == TAG_NAME_INDEX) + if (tag.category == TAG_NAME_INDEX) { list = &def.index; } - else if (tag->category == TAG_NAME_STATS) + else if (tag.category == TAG_NAME_STATS) { list = &def.stats; } - else if (tag->category == TAG_NAME_CLAS) + else if (tag.category == TAG_NAME_CLAS) { list = &def.clas; } - else if (tag->category == TAG_NAME_CODE) + else if (tag.category == TAG_NAME_CODE) { list = &def.code; } @@ -519,7 +519,9 @@ QString DatabaseManager::queryKanji(const QString &query, Kanji &kanji) { continue; } - list->append(QPair(*tag, it->second.toString())); + list->append( + QPair(std::move(tag), it->second.toString()) + ); } kanji.definitions.append(def); diff --git a/src/dict/databasemanager.h b/src/dict/databasemanager.h index 360adba..4e552a7 100644 --- a/src/dict/databasemanager.h +++ b/src/dict/databasemanager.h @@ -78,13 +78,13 @@ class DatabaseManager * Gets a list of dictionary names in the database in arbitrary order. * @return A list of dictionary names. */ - QStringList getDictionaries(); + QStringList getDictionaries() const; /** * Gets the list of disabled dictionaries. * @return The names of all disabled dictionaries. */ - QStringList getDisabledDictionaries(); + QStringList getDisabledDictionaries() const; /** * Searches for terms that exactly match the query. Does automatic @@ -93,7 +93,7 @@ class DatabaseManager * @param[out] terms A list of matching terms. Belongs to the caller. * @return Empty string on success, error string on error. */ - QString queryTerms(const QString &query, QList &terms); + QString queryTerms(const QString &query, QList &terms) const; /** * Searches for kanji that exactly match the query. @@ -101,7 +101,7 @@ class DatabaseManager * @param[out] kanji The Kanji struct to populate. * @return Empty string on success, error string on error. */ - QString queryKanji(const QString &query, Kanji &kanji); + QString queryKanji(const QString &query, Kanji &kanji) const; private: /** @@ -203,7 +203,7 @@ class DatabaseManager sqlite3 *m_db; /* Locks the database for reading and writing. */ - QReadWriteLock m_dbLock; + mutable QReadWriteLock m_dbLock; /* Saved path to the database. */ const QByteArray m_dbpath; diff --git a/src/dict/dictionary.cpp b/src/dict/dictionary.cpp index d52a906..343600f 100644 --- a/src/dict/dictionary.cpp +++ b/src/dict/dictionary.cpp @@ -21,116 +21,62 @@ #include "dictionary.h" #include -#include + #include #include #include #include #include "databasemanager.h" +#include "exactquerygenerator.h" +#include "mecabquerygenerator.h" #include "util/constants.h" #include "util/globalmediator.h" #include "util/utils.h" -/** - * A pair to search for. The deconjugated string is used for querying the - * database and the surface string is used for cloze generation. - */ -struct SearchPair +/* Begin Constructor/Destructor */ + +Dictionary::Dictionary(QObject *parent) : QObject(parent) { - /* The deconjugated string */ - QString deconj; + m_db = std::make_unique(DirectoryUtils::getDictionaryDB()); - /* The raw conjugated string */ - QString surface; -}; + initDictionaryOrder(); + initQueryGenerators(); -/** - * A special SearchPair that contains additional information needed by MeCab. - */ -struct MeCabPair : public SearchPair -{ - /* The surface string without whitespace */ - QString surfaceClean; -}; + GlobalMediator *med = GlobalMediator::getGlobalMediator(); + med->setDictionary(this); + connect( + med, &GlobalMediator::dictionaryOrderChanged, + this, &Dictionary::initDictionaryOrder + ); +} -/* Begin Static Helpers */ -#if defined(Q_OS_WIN) -/** - * This whole section is necessary on Windows because MeCab has a bug that - * prevents it from loading dictionaries if there are spaces in the path on - * Windows. If Memento is to be install in "Program Files", this quickly - * becomes an issue. This workaround turns all long paths into space-less - * short paths. - */ - -#include - -/** - * Takes a Windows long path and returns an 8.3/short path. - * @param path The Window long path to convert. - * @return A Windows short path, or the empty string on error. - */ -static QByteArray toWindowsShortPath(const QString &path) +void Dictionary::initDictionaryOrder() { - QByteArray pathArr = path.toUtf8(); - DWORD length = 0; - - length = GetShortPathNameA(pathArr.constData(), NULL, 0); - if (length == 0) - { - return ""; - } + m_dicOrder.lock.lockForWrite(); - QByteArray buf(length, '\0'); - length = GetShortPathNameA(pathArr, buf.data(), length); - if (length == 0) + QSettings settings; + settings.beginGroup(Constants::Settings::Dictionaries::GROUP); + QStringList dicts = m_db->getDictionaries(); + m_dicOrder.map.clear(); + for (const QString &dict : dicts) { - return ""; + m_dicOrder.map[dict] = settings.value(dict).toInt(); } - buf.chop(1); - return buf; -} + settings.endGroup(); -/** - * Generates the MeCab argument on Windows. - * @return An argument to pass MeCab so it uses the install's ipadic. - */ -static QByteArray genMecabArg() -{ - QByteArray arg = "-r "; - arg += toWindowsShortPath( - DirectoryUtils::getDictionaryDir() + "ipadic" + SLASH + "dicrc" - ); - arg += " -d "; - arg += toWindowsShortPath(DirectoryUtils::getDictionaryDir() + "ipadic"); - return arg; + m_dicOrder.lock.unlock(); } -#endif -/* End Static Helpers */ -/* Begin Constructor/Destructor */ - -#if defined(Q_OS_WIN) -#define MECAB_ARG (genMecabArg()) -#elif defined(APPIMAGE) || defined(APPBUNDLE) -#define MECAB_ARG ( \ - "-r " + DirectoryUtils::getDictionaryDir() + "ipadic" + SLASH + "dicrc " \ - "-d " + DirectoryUtils::getDictionaryDir() + "ipadic" \ -).toUtf8() -#else -#define MECAB_ARG ("") -#endif - -Dictionary::Dictionary(QObject *parent) : QObject(parent) +void Dictionary::initQueryGenerators() { - m_db = new DatabaseManager(DirectoryUtils::getDictionaryDB()); - - QByteArray mecabArg = MECAB_ARG; - m_tagger = MeCab::createTagger(mecabArg); - if (m_tagger == nullptr) + m_generators.emplace_back(std::make_unique()); + m_generators.emplace_back(std::make_unique()); + if (!m_generators.back()->valid()) { + m_generators.pop_back(); + qDebug() << MeCab::getTaggerError(); QMessageBox::critical( nullptr, @@ -154,96 +100,61 @@ Dictionary::Dictionary(QObject *parent) : QObject(parent) #endif ); } - - initDictionaryOrder(); - - GlobalMediator *med = GlobalMediator::getGlobalMediator(); - med->setDictionary(this); - connect( - med, &GlobalMediator::dictionaryOrderChanged, - this, &Dictionary::initDictionaryOrder - ); -} - -#undef MECAB_ARG - -void Dictionary::initDictionaryOrder() -{ - m_dicOrder.lock.lockForWrite(); - - QSettings settings; - settings.beginGroup(Constants::Settings::Dictionaries::GROUP); - QStringList dicts = m_db->getDictionaries(); - m_dicOrder.map.clear(); - for (const QString &dict : dicts) - { - m_dicOrder.map[dict] = settings.value(dict).toInt(); - } - settings.endGroup(); - - m_dicOrder.lock.unlock(); } Dictionary::~Dictionary() { - delete m_db; - delete m_tagger; + } /* End Constructor/Destructor */ /* Begin Term Searching Methods */ -void Dictionary::ExactWorker::run() +SharedTermList Dictionary::searchTerms( + const QString query, + const QString subtitle, + const int index, + const int *currentIndex) { - while (query.size() > endSize && index == *currentIndex) + std::vector queries = generateQueries(query); + if (index != *currentIndex) { - QList results; - db.queryTerms(query, results); + return nullptr; + } - /* Generate cloze data in entries */ - QString clozePrefix; - QString clozeBody; - QString clozeSuffix; - if (!results.isEmpty()) - { - clozePrefix = subtitle.left(index); - clozeBody = subtitle.mid(index, query.size()); - clozeSuffix = subtitle.right( - subtitle.size() - (index + query.size()) - ); - } + sortQueries(queries); + filterDuplicates(queries); + if (index != *currentIndex) + { + return nullptr; + } - for (SharedTerm term : results) + /* Query the database */ + SharedTermList terms = SharedTermList(new QList); + for (const SearchQuery &query : queries) + { + if (index != *currentIndex) { - term->sentence = subtitle; - term->clozePrefix = clozePrefix; - term->clozeBody = clozeBody; - term->clozeSuffix = clozeSuffix; + return nullptr; } - terms.append(results); - query.chop(1); - } -} - -void Dictionary::MeCabWorker::run() -{ - while (begin != end && index == *currentIndex) - { - const SearchPair &pair = *begin; QList results; - db.queryTerms(pair.deconj, results); + QString err = m_db->queryTerms(query.deconj, results); + if (!err.isEmpty()) + { + qDebug() << err; + return nullptr; + } - /* Generate cloze data in entries */ QString clozePrefix; QString clozeBody; QString clozeSuffix; if (!results.isEmpty()) { clozePrefix = subtitle.left(index); - clozeBody = subtitle.mid(index, pair.surface.size()); + clozeBody = subtitle.mid(index, query.surface.size()); clozeSuffix = subtitle.right( - subtitle.size() - (index + pair.surface.size()) + subtitle.size() - (index + query.surface.size()) ); } @@ -255,203 +166,69 @@ void Dictionary::MeCabWorker::run() term->clozeSuffix = clozeSuffix; } - terms.append(results); - ++begin; + terms->append(std::move(results)); } -} - -#define WORD_INDEX 6 - -/** - * Gets the deconjugated word from a MeCab node. - * @param node The node to get the deconjugation from. - * @return The deconjugated word, * if there was an error. - */ -static inline QString extractDeconjugation(const MeCab::Node *node) -{ - return QString::fromUtf8(node->feature).split(',')[WORD_INDEX]; -} -#undef WORD_INDEX + sortTerms(terms); + if (index != *currentIndex) + { + return nullptr; + } -/** - * Gets the surface string including whitespace from a MeCab node. - * @param node The MeCab node to get the surface from. - * @return The surface string including whitespace. - */ -static inline QString extractSurface(const MeCab::Node *node) -{ - const char *rawText = node->surface; - rawText -= node->rlength - node->length; - return QString::fromUtf8(rawText, node->rlength); + return terms; } -/** - * Gets the surface string without whitespace from a MeCab node. - * @param node The MeCab node to get the surface from. - * @return The surface string without whitespace. - */ -static inline QString extractCleanSurface(const MeCab::Node *node) +std::vector Dictionary::generateQueries(const QString &text) const { - return QString::fromUtf8(node->surface, node->length); + std::vector queries; + for (const std::unique_ptr &gen : m_generators) + { + std::vector currQueries = gen->generateQueries(text); + queries.insert( + std::end(queries), + std::make_move_iterator(std::begin(currQueries)), + std::make_move_iterator(std::end(currQueries)) + ); + } + return queries; } -/** - * Recursively generates queries and surface strings from a node. - * @param node The node to start at. Usually the next node after the BOS - * node. Is nullptr safe. - * @return A list of conjugated string and surface (raw) strings. Belongs to - * the caller. - */ -static QList *generateQueriesHelper( - const MeCab::Node *node) +void Dictionary::sortQueries(std::vector &queries) { - QList *queries = new QList; - while (node) - { - QString deconj = extractDeconjugation(node); - QString surface = extractSurface(node); - QString surfaceClean = extractCleanSurface(node); - if (deconj != "*") + std::sort( + std::begin(queries), std::end(queries), + [] (const SearchQuery &rhs, const SearchQuery &lhs) -> bool { - MeCabPair pair; - pair.deconj = deconj; - pair.surface = surface; - pair.surfaceClean = surfaceClean; - queries->append(pair); - } - - if (node->next) - { - QList *subQueries = generateQueriesHelper(node->next); - for (MeCabPair &p : *subQueries) + if (rhs.surface.size() > lhs.surface.size()) + { + return true; + } + if (rhs.surface.size() == lhs.surface.size()) { - p.deconj.prepend(surfaceClean); - p.surface.prepend(surface); - p.surfaceClean.prepend(surfaceClean); - queries->append(p); + return rhs.deconj.size() > lhs.deconj.size(); } - delete subQueries; + return false; } - - node = node->bnext; - } - return queries; + ); } -QList Dictionary::generateQueries(const QString &query) +void Dictionary::filterDuplicates(std::vector &queries) { - QList queries; - if (query.isEmpty() || m_tagger == nullptr) - { - return queries; - } - - /* Lemmatize the query */ - MeCab::Lattice *lattice = MeCab::createLattice(); - QByteArray queryArr = query.toUtf8(); - lattice->set_sentence(queryArr); - if (!m_tagger->parse(lattice)) - { - qDebug() << "Cannot access MeCab"; - delete lattice; - return queries; - } - - /* Generate queries */ - QList *unfiltered = - generateQueriesHelper(lattice->bos_node()->next); - QSet duplicates; - for (const MeCabPair &p : *unfiltered) - { - if (query.startsWith(p.deconj) || duplicates.contains(p.deconj)) + auto last = std::unique( + std::begin(queries), std::end(queries), + [] (const SearchQuery &a, const SearchQuery &b) -> bool { - continue; + return a.deconj == b.deconj; } - queries << p; - duplicates << p.deconj; - } - delete unfiltered; - delete lattice; - - return queries; + ); + queries.erase(last, std::end(queries)); } -/* The maximum number of queries one thread can be accountable for. */ -#define QUERIES_PER_THREAD 4 - -SharedTermList Dictionary::searchTerms( - const QString query, - const QString subtitle, - const int index, - const int *currentIndex) +void Dictionary::sortTerms(SharedTermList &terms) const { - SharedTermList terms = SharedTermList(new QList); - - /* Fork worker threads for exact queries */ - QList workers; - for (QString str = query; !str.isEmpty(); str.chop(QUERIES_PER_THREAD)) - { - int endSize = str.size() - QUERIES_PER_THREAD; - if (endSize < 0) + std::sort(std::begin(*terms), std::end(*terms), + [] (const SharedTerm lhs, const SharedTerm rhs) -> bool { - endSize = 0; - } - - DictionaryWorker *worker = new ExactWorker( - str, - endSize, - subtitle, - index, - currentIndex, - *m_db - ); - - worker->start(); - workers.append(worker); - } - - /* Get lemmatized queries */ - QList queries = generateQueries(query); - if (!queries.isEmpty()) - { - for (size_t i = 0; - queries.constBegin() + i < queries.constEnd(); - i += QUERIES_PER_THREAD) - { - auto endIt = queries.constBegin() + i + QUERIES_PER_THREAD; - if (endIt > queries.constEnd()) - endIt = queries.constEnd(); - - DictionaryWorker *worker = new MeCabWorker( - queries.constBegin() + i, - endIt, - subtitle, - index, - currentIndex, - *m_db - ); - - worker->start(); - workers.append(worker); - } - } - - /* Wait for the exact thread to finish */ - for (DictionaryWorker *worker : workers) - { - worker->wait(); - terms->append(worker->terms); - delete worker; - } - - /* Sort the results by cloze length and score */ - if (index != *currentIndex) - { - return nullptr; - } - std::sort(terms->begin(), terms->end(), - [] (const SharedTerm lhs, const SharedTerm rhs) -> bool { return lhs->clozeBody.size() > rhs->clozeBody.size() || ( lhs->clozeBody.size() == rhs->clozeBody.size() && @@ -460,25 +237,21 @@ SharedTermList Dictionary::searchTerms( } ); - /* Sort internal term data */ - if (index != *currentIndex) - { - return nullptr; - } - m_dicOrder.lock.lockForRead(); for (SharedTerm term : *terms) { - std::sort(term->definitions.begin(), term->definitions.end(), - [=] (const TermDefinition &lhs, const TermDefinition &rhs) -> bool { + std::sort(std::begin(term->definitions), std::end(term->definitions), + [=] (const TermDefinition &lhs, const TermDefinition &rhs) -> bool + { uint32_t lhsPriority = m_dicOrder.map[lhs.dictionary]; uint32_t rhsPriority = m_dicOrder.map[rhs.dictionary]; return lhsPriority < rhsPriority || (lhsPriority == rhsPriority && lhs.score > rhs.score); } ); - std::sort(term->frequencies.begin(), term->frequencies.end(), - [=] (const Frequency &lhs, const Frequency &rhs) -> bool { + std::sort(std::begin(term->frequencies), std::end(term->frequencies), + [=] (const Frequency &lhs, const Frequency &rhs) -> bool + { return m_dicOrder.map[lhs.dictionary] < m_dicOrder.map[rhs.dictionary]; } @@ -491,12 +264,8 @@ SharedTermList Dictionary::searchTerms( } } m_dicOrder.lock.unlock(); - - return terms; } -#undef QUERIES_PER_THREAD - /* End Term Searching Methods */ /* Begin Kanji Searching Methods */ @@ -588,13 +357,14 @@ QString Dictionary::disableDictionaries(const QStringList &dictionaries) return ""; } -QStringList Dictionary::getDictionaries() +QStringList Dictionary::getDictionaries() const { QStringList dictionaries = m_db->getDictionaries(); m_dicOrder.lock.lockForRead(); std::sort(dictionaries.begin(), dictionaries.end(), - [=] (const QString &lhs, const QString &rhs) -> bool { + [=] (const QString &lhs, const QString &rhs) -> bool + { return m_dicOrder.map[lhs] < m_dicOrder.map[rhs]; } ); @@ -603,7 +373,7 @@ QStringList Dictionary::getDictionaries() return dictionaries; } -QStringList Dictionary::getDisabledDictionaries() +QStringList Dictionary::getDisabledDictionaries() const { return m_db->getDisabledDictionaries(); } @@ -611,10 +381,11 @@ QStringList Dictionary::getDisabledDictionaries() /* End Dictionary Methods */ /* Begin Helpers */ -void Dictionary::sortTags(QList &tags) +void Dictionary::sortTags(QList &tags) const { - std::sort(tags.begin(), tags.end(), - [] (const Tag &lhs, const Tag &rhs) -> bool { + std::sort(std::begin(tags), std::end(tags), + [] (const Tag &lhs, const Tag &rhs) -> bool + { return lhs.order < rhs.order || (lhs.order == rhs.order && lhs.score > rhs.score); } diff --git a/src/dict/dictionary.h b/src/dict/dictionary.h index 4620eaa..72e2f3b 100644 --- a/src/dict/dictionary.h +++ b/src/dict/dictionary.h @@ -21,21 +21,19 @@ #ifndef DICTIONARY_H #define DICTIONARY_H +#include + #include #include #include -#include -#include "expression.h" +#include +#include -namespace MeCab -{ - class Tagger; - class Lattice; -} +#include "expression.h" +#include "querygenerator.h" class DatabaseManager; -struct SearchPair; /** * The intended API for interacting with the database. @@ -46,7 +44,7 @@ class Dictionary : public QObject public: Dictionary(QObject *parent = nullptr); - ~Dictionary(); + virtual ~Dictionary(); /** * Searches for all terms in the query. @@ -105,13 +103,13 @@ class Dictionary : public QObject * Gets a list of dictionaries ordered by user preference. * @return A list of dictionaries ordered by user preference. */ - QStringList getDictionaries(); + QStringList getDictionaries() const; /** * Gets the list of disabled dictionaries. * @return The names of all disabled dictionaries. */ - QStringList getDisabledDictionaries(); + QStringList getDisabledDictionaries() const; private Q_SLOTS: /** @@ -119,25 +117,49 @@ private Q_SLOTS: */ void initDictionaryOrder(); + /** + * Populates the list of QueryGenerators. + */ + void initQueryGenerators(); + private: /** - * Uses MeCab to generate a list of non-exact queries. - * @param query The raw query. - * @return A list of search pairs. + * Generate queries from text. + * @param text The text to generate queries from. + * @return The list of SearchQuery. + */ + [[nodiscard]] + std::vector generateQueries(const QString &text) const; + + /** + * Sorties queries in order from ascending length of the surface. + * @param[out] queries The list of queries to sort. */ - QList generateQueries(const QString &query); + static void sortQueries(std::vector &queries); + + /** + * Filters out duplicates from the queries vector. + * @param[out] queries The queries to filter duplicates from. + */ + static void filterDuplicates(std::vector &queries); + + /** + * Sort the term list by priority and length. + * @param[out] terms The term list to sort. + */ + void sortTerms(SharedTermList &terms) const; /** * Sorts tag by descending order, breaking ties on ascending score. * @param[out] tags The list of tags to sort. */ - void sortTags(QList &tags); + void sortTags(QList &tags) const; - /* The DatabaseManager. */ - DatabaseManager *m_db; + /* The DatabaseManager */ + std::unique_ptr m_db; - /* The object used for interacting with MeCab. */ - MeCab::Tagger *m_tagger; + /* List of QueryGenerators */ + std::vector> m_generators; /* Contains dictionary priority information. */ struct DictOrder @@ -146,135 +168,8 @@ private Q_SLOTS: QHash map; /* Used for locking for reading and writing. */ - QReadWriteLock lock; + mutable QReadWriteLock lock; } m_dicOrder; - - /** - * Parent class of dictionary worker threads. Used to cut down on duplicated - * terms. - */ - class DictionaryWorker : public QThread - { - public: - DictionaryWorker( - const QString &subtitle, - const int index, - const int *currentIndex, - DatabaseManager &db - ) : subtitle(subtitle), - index(index), - currentIndex(currentIndex), - db(db) {} - - /* Found terms will be in this list */ - QList terms; - - protected: - /* The current subtitle */ - const QString &subtitle; - - /* The index into the subtitle */ - const int index; - - /* A reference to the current index */ - const int *currentIndex; - - /* A reference to the database */ - DatabaseManager &db; - }; - - /** - * Worker thread for querying the term database for exact substrings of the - * query. - */ - class ExactWorker : public Dictionary::DictionaryWorker - { - public: - /** - * Creates a worker thread for finding exact matches against the query. - * Does so by chopping off the last character of the query until - * endSize is reached. - * @param query The query to look for terms in. - * @param endSize The smallest size a query can reach (exclusive) - * before searching ceases. - * @param subtitle The subtitle the query appears in. - * @param index The index into the subtitle where the query - * begins. - * @param currentIndex A pointer to the current index. If this value is - * no different from the index before this method is - * done, the search is aborted. - * @param db The database manager. - */ - ExactWorker( - const QString &query, - const int endSize, - const QString &subtitle, - const int index, - const int *currentIndex, - DatabaseManager &db - ) : Dictionary::DictionaryWorker( - subtitle, - index, - currentIndex, - db - ), - query(query), - endSize(endSize) {} - - void run() override; - - private: - /* The query string */ - QString query; - - /* The final size of the query. When the query reaches this size, the - * thread terminates. */ - const int endSize; - }; - - /** - * Worker thread for querying the term database for MeCab generated queries. - */ - class MeCabWorker : public Dictionary::DictionaryWorker - { - public: - /** - * Creates a worker thread for finding MeCab queries. - * @param begin An iterator pointing to the first query to search - * (inclusive). - * @param end An iterator pointing to the largest query to - * search (exclusive). - * before searching ceases. - * @param subtitle The subtitle the query appears in. - * @param index The index into the subtitle where the query - * begins. - * @param currentIndex A pointer to the current index. If this value is - * no different from the index before this method is - * done, the search is aborted. - * @param db The database manager. - */ - MeCabWorker( - QList::const_iterator begin, - QList::const_iterator end, - const QString &subtitle, - const int index, - const int *currentIndex, - DatabaseManager &db - ) : Dictionary::DictionaryWorker( - subtitle, - index, - currentIndex, - db - ), - begin(begin), - end(end) {} - - void run() override; - - private: - /* Start and end iterators to queries */ - QList::const_iterator begin, end; - }; }; #endif // DICTIONARY_H diff --git a/src/dict/exactquerygenerator.cpp b/src/dict/exactquerygenerator.cpp new file mode 100644 index 0000000..0df5272 --- /dev/null +++ b/src/dict/exactquerygenerator.cpp @@ -0,0 +1,39 @@ +//////////////////////////////////////////////////////////////////////////////// +// +// Copyright (c) 2024 Ripose +// +// This file is part of Memento. +// +// Memento is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2 of the License. +// +// Memento is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with Memento. If not, see . +// +//////////////////////////////////////////////////////////////////////////////// + +#include "exactquerygenerator.h" + +std::vector ExactQueryGenerator::generateQueries( + const QString &text) const +{ + std::vector queries; + + QString query = text; + while (!query.isEmpty()) + { + SearchQuery sq; + sq.deconj = query; + sq.surface = query; + queries.emplace_back(std::move(sq)); + query.chop(1); + } + + return queries; +} diff --git a/src/dict/exactquerygenerator.h b/src/dict/exactquerygenerator.h new file mode 100644 index 0000000..10902a9 --- /dev/null +++ b/src/dict/exactquerygenerator.h @@ -0,0 +1,64 @@ +//////////////////////////////////////////////////////////////////////////////// +// +// Copyright (c) 2024 Ripose +// +// This file is part of Memento. +// +// Memento is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2 of the License. +// +// Memento is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with Memento. If not, see . +// +//////////////////////////////////////////////////////////////////////////////// + +#ifndef EXACTQUERYGENERATOR_H +#define EXACTQUERYGENERATOR_H + +#include "querygenerator.h" + +class ExactQueryGenerator final : public QueryGenerator +{ +public: + virtual ~ExactQueryGenerator() = default; + + /** + * ExactQueryGenerator is always valid. + * @return Always returns true. + */ + [[nodiscard]] + inline bool valid() const override + { + return true; + } + + /** + * Generates a list of queries from the given text. + * For a query like 昨日すき焼きを食べました, return results are like + * 昨日すき焼きを食べました + * 昨日すき焼きを食べまし + * 昨日すき焼きを食べま + * 昨日すき焼きを食べ + * 昨日すき焼きを食 + * 昨日すき焼きを + * 昨日すき焼き + * 昨日すき焼 + * 昨日すき + * 昨日す + * 昨日 + * 昨 + * @param text The text to turn into queries + * @return The list of generated queries + */ + [[nodiscard]] + std::vector generateQueries( + const QString &text) const override; +}; + +#endif // EXACTQUERYGENERATOR_H diff --git a/src/dict/mecabquerygenerator.cpp b/src/dict/mecabquerygenerator.cpp new file mode 100644 index 0000000..fd31f90 --- /dev/null +++ b/src/dict/mecabquerygenerator.cpp @@ -0,0 +1,199 @@ +//////////////////////////////////////////////////////////////////////////////// +// +// Copyright (c) 2024 Ripose +// +// This file is part of Memento. +// +// Memento is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2 of the License. +// +// Memento is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with Memento. If not, see . +// +//////////////////////////////////////////////////////////////////////////////// + +#include "mecabquerygenerator.h" + +#include +#include + +#include "util/utils.h" + +/* Begin Static Helpers */ + +#if defined(Q_OS_WIN) +/** + * This whole section is necessary on Windows because MeCab has a bug that + * prevents it from loading dictionaries if there are spaces in the path on + * Windows. If Memento is to be install in "Program Files", this quickly + * becomes an issue. This workaround turns all long paths into space-less + * short paths. + */ +#define NOMINMAX +#include + +#include + +/** + * Takes a Windows long path and returns an 8.3/short path. + * @param path The Window long path to convert. + * @return A Windows short path, or the empty string on error. + */ +static QByteArray toWindowsShortPath(const QString &path) +{ + QByteArray pathArr = path.toUtf8(); + DWORD length = 0; + + length = GetShortPathNameA(pathArr.constData(), NULL, 0); + if (length == 0) + { + return ""; + } + + QByteArray buf(length, '\0'); + length = GetShortPathNameA(pathArr, buf.data(), length); + if (length == 0) + { + return ""; + } + buf.chop(1); + return buf; +} + +/** + * Generates the MeCab argument on Windows. + * @return An argument to pass MeCab so it uses the install's ipadic. + */ +static QByteArray genMecabArg() +{ + QByteArray arg = "-r "; + arg += toWindowsShortPath( + DirectoryUtils::getDictionaryDir() + "ipadic" + SLASH + "dicrc" + ); + arg += " -d "; + arg += toWindowsShortPath(DirectoryUtils::getDictionaryDir() + "ipadic"); + return arg; +} +#endif + +/* End Static Helpers */ +/* Begin Constructor */ + +MeCabQueryGenerator::MeCabQueryGenerator() +{ +#if defined(Q_OS_WIN) + QByteArray mecabArg = genMecabArg(); +#elif defined(APPIMAGE) || defined(APPBUNDLE) + QByteArray mecabArg = ( \ + "-r " + DirectoryUtils::getDictionaryDir() + "ipadic" + SLASH + "dicrc " \ + "-d " + DirectoryUtils::getDictionaryDir() + "ipadic" \ + ).toUtf8(); +#else + QByteArray mecabArg = ""; +#endif + m_tagger.reset(MeCab::createTagger(mecabArg)); + if (m_tagger == nullptr) + { + qDebug() << MeCab::getTaggerError(); + } +} + +/* End Constructor */ +/* Begin Query Generator */ + +std::vector MeCabQueryGenerator::generateQueries( + const QString &text) const +{ + if (!valid() || text.isEmpty()) + { + return {}; + } + + std::unique_ptr lattice(MeCab::createLattice()); + QByteArray textArr = text.toUtf8(); + lattice->set_sentence(textArr); + if (!m_tagger->parse(lattice.get())) + { + qDebug() << "Cannot access MeCab"; + qDebug() << MeCab::getLastError(); + return {}; + } + std::vector mecabQueries = + generateQueriesHelper(lattice->bos_node()->next); + + std::vector queries; + queries.reserve(mecabQueries.size()); + std::copy( + std::begin(mecabQueries), std::end(mecabQueries), + std::back_inserter(queries) + ); + return queries; +} + +std::vector +MeCabQueryGenerator::generateQueriesHelper(const MeCab::Node *node) +{ + std::vector queries; + while (node) + { + QString deconj = extractDeconjugation(node); + QString surface = extractSurface(node); + QString surfaceClean = extractCleanSurface(node); + if (deconj != "*") + { + MeCabQuery query; + query.deconj = deconj; + query.surface = surface; + query.surfaceClean = surfaceClean; + queries.emplace_back(std::move(query)); + } + + if (node->next) + { + std::vector subQueries = + generateQueriesHelper(node->next); + for (MeCabQuery &p : subQueries) + { + p.deconj.prepend(surfaceClean); + p.surface.prepend(surface); + p.surfaceClean.prepend(surfaceClean); + queries.emplace_back(std::move(p)); + } + } + + node = node->bnext; + } + return queries; +} + +inline QString MeCabQueryGenerator::extractDeconjugation( + const MeCab::Node *node) +{ + constexpr int WORD_INDEX{6}; + QStringList features = QString::fromUtf8(node->feature).split(','); + if (features.size() <= WORD_INDEX) + { + return ""; + } + return features[WORD_INDEX]; +} + +inline QString MeCabQueryGenerator::extractSurface(const MeCab::Node *node) +{ + const char *rawText = node->surface; + rawText -= node->rlength - node->length; + return QString::fromUtf8(rawText, node->rlength); +} + +inline QString MeCabQueryGenerator::extractCleanSurface(const MeCab::Node *node) +{ + return QString::fromUtf8(node->surface, node->length); +} + +/* End Query Generator */ diff --git a/src/dict/mecabquerygenerator.h b/src/dict/mecabquerygenerator.h new file mode 100644 index 0000000..16eb0fa --- /dev/null +++ b/src/dict/mecabquerygenerator.h @@ -0,0 +1,108 @@ +//////////////////////////////////////////////////////////////////////////////// +// +// Copyright (c) 2024 Ripose +// +// This file is part of Memento. +// +// Memento is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2 of the License. +// +// Memento is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with Memento. If not, see . +// +//////////////////////////////////////////////////////////////////////////////// + +#ifndef MECABQUERYGENERATOR_H +#define MECABQUERYGENERATOR_H + +#include "querygenerator.h" + +#include + +#include + +/** + * Query generator that uses MeCab backed by ipadic to deconjugate text. + */ +class MeCabQueryGenerator final : public QueryGenerator +{ +public: + MeCabQueryGenerator(); + virtual ~MeCabQueryGenerator() = default; + + /** + * Returns if the query generator is valid. + * @return true if the generator is valid, + * @return false otherwise. + */ + [[nodiscard]] + inline bool valid() const override + { + return m_tagger != nullptr; + } + + /** + * Generates queries from a given text string. + * @param text The text string to generate queries from. + * @return The list of generated queries. + */ + [[nodiscard]] + std::vector generateQueries( + const QString &text) const override; + +private: + /** + * A special SearchPair that contains additional information needed by + * MeCab. + */ + struct MeCabQuery : public SearchQuery + { + /* The surface string without whitespace */ + QString surfaceClean; + }; + + /** + * Recursively generates queries and surface strings from a node. + * @param node The node to start at. Usually the next node after the BOS + * node. Is nullptr safe. + * @return A list of conjugated string and surface (raw) strings. + */ + [[nodiscard]] + static std::vector generateQueriesHelper( + const MeCab::Node *node); + + /** + * Gets the deconjugated word from a MeCab node. + * @param node The node to get the deconjugation from. + * @return The deconjugated word, * if there was an error. + */ + [[nodiscard]] + static inline QString extractDeconjugation(const MeCab::Node *node); + + /** + * Gets the surface string including whitespace from a MeCab node. + * @param node The MeCab node to get the surface from. + * @return The surface string including whitespace. + */ + [[nodiscard]] + static inline QString extractSurface(const MeCab::Node *node); + + /** + * Gets the surface string without whitespace from a MeCab node. + * @param node The MeCab node to get the surface from. + * @return The surface string without whitespace. + */ + [[nodiscard]] + static inline QString extractCleanSurface(const MeCab::Node *node); + + /* The object used for interacting with MeCab */ + std::unique_ptr m_tagger{nullptr}; +}; + +#endif // MECABQUERYGENERATOR_H diff --git a/src/dict/querygenerator.h b/src/dict/querygenerator.h new file mode 100644 index 0000000..7dfc2c7 --- /dev/null +++ b/src/dict/querygenerator.h @@ -0,0 +1,66 @@ +//////////////////////////////////////////////////////////////////////////////// +// +// Copyright (c) 2024 Ripose +// +// This file is part of Memento. +// +// Memento is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2 of the License. +// +// Memento is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with Memento. If not, see . +// +//////////////////////////////////////////////////////////////////////////////// + +#ifndef QUERYGENERATOR_H +#define QUERYGENERATOR_H + +#include + +#include "searchquery.h" + +/** + * Interface class for generating search queries from text. + */ +class QueryGenerator +{ +public: + QueryGenerator() = default; + virtual ~QueryGenerator() = default; + + /** + * Returns if the generator is valid. + * @return true if the generator is valid, + * @return false otherwise + */ + [[nodiscard]] + virtual bool valid() const = 0; + + /** + * Returns if the generator is invalid + * @return true if the generator is invalid, + * @return false otherwise + */ + [[nodiscard]] + inline bool operator!() const + { + return !valid(); + } + + /** + * Generate a list of queries from a string of text. + * @param text The text to extract queries from. + * @return A list of search queries. + */ + [[nodiscard]] + virtual std::vector generateQueries( + const QString &text) const = 0; +}; + +#endif // QUERYGENERATOR_H diff --git a/src/dict/searchquery.h b/src/dict/searchquery.h new file mode 100644 index 0000000..b908138 --- /dev/null +++ b/src/dict/searchquery.h @@ -0,0 +1,39 @@ +//////////////////////////////////////////////////////////////////////////////// +// +// Copyright (c) 2024 Ripose +// +// This file is part of Memento. +// +// Memento is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2 of the License. +// +// Memento is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with Memento. If not, see . +// +//////////////////////////////////////////////////////////////////////////////// + +#ifndef SEARCHQUERY_H +#define SEARCHQUERY_H + +#include + +/** + * A pair to search for. The deconjugated string is used for querying the + * database and the surface string is used for cloze generation. + */ +struct SearchQuery +{ + /* The deconjugated string */ + QString deconj; + + /* The raw conjugated string */ + QString surface; +}; + +#endif // SEARCHQUERY_H diff --git a/src/gui/widgets/mpv/mpvwidget.cpp b/src/gui/widgets/mpv/mpvwidget.cpp index 9430ccb..e0aef8a 100644 --- a/src/gui/widgets/mpv/mpvwidget.cpp +++ b/src/gui/widgets/mpv/mpvwidget.cpp @@ -29,6 +29,8 @@ #include #if defined(Q_OS_WIN) +#define NOMINMAX +#include #include #elif defined(Q_OS_UNIX) && !defined(Q_OS_DARWIN) #include