From b1d7c60b133ea56838f88c438b1ba75f522beab5 Mon Sep 17 00:00:00 2001 From: Weng Xuetian Date: Fri, 19 Dec 2025 06:37:10 -0800 Subject: [PATCH] Add some additional API for context words and only keep words based on max lm order --- src/libime/core/languagemodel.cpp | 2 ++ src/libime/core/languagemodel.h | 2 ++ src/libime/pinyin/pinyincontext.cpp | 33 +++++++++++++++++++++++++++-- src/libime/pinyin/pinyincontext.h | 20 +++++++++++++++++ test/testpinyincontext.cpp | 12 ++++++++++- 5 files changed, 66 insertions(+), 3 deletions(-) diff --git a/src/libime/core/languagemodel.cpp b/src/libime/core/languagemodel.cpp index 50f3b42..72cc204 100644 --- a/src/libime/core/languagemodel.cpp +++ b/src/libime/core/languagemodel.cpp @@ -148,6 +148,8 @@ LanguageModel::LanguageModel( LanguageModel::~LanguageModel() {} +size_t LanguageModel::maxOrder() { return KENLM_MAX_ORDER; } + std::shared_ptr LanguageModel::languageModelFile() const { FCITX_D(); diff --git a/src/libime/core/languagemodel.h b/src/libime/core/languagemodel.h index 6035091..d16960e 100644 --- a/src/libime/core/languagemodel.h +++ b/src/libime/core/languagemodel.h @@ -73,6 +73,8 @@ class LIBIMECORE_EXPORT LanguageModel : public LanguageModelBase { std::shared_ptr file = nullptr); virtual ~LanguageModel(); + static size_t maxOrder(); + std::shared_ptr languageModelFile() const; WordIndex beginSentence() const override; diff --git a/src/libime/pinyin/pinyincontext.cpp b/src/libime/pinyin/pinyincontext.cpp index edc9d3c..605f8d7 100644 --- a/src/libime/pinyin/pinyincontext.cpp +++ b/src/libime/pinyin/pinyincontext.cpp @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -81,7 +82,7 @@ class PinyinContextPrivate : public fcitx::QPtrHolder { mutable std::vector candidatesToCursor_; mutable std::unordered_set candidatesToCursorSet_; std::vector conn_; - std::vector contextWords_; + std::list contextWords_; size_t alignCursorToNextSegment() const { FCITX_Q(); @@ -988,10 +989,38 @@ void PinyinContext::setContextWords( const std::vector &contextWords) { FCITX_D(); d->contextWords_.clear(); - for (const auto &word : contextWords) { + appendContextWords(contextWords); +} + +void PinyinContext::clearContextWords() { + FCITX_D(); + d->contextWords_.clear(); +} + +void PinyinContext::appendContextWords( + const std::vector &contextWords) { + FCITX_D(); + + size_t needed = LanguageModel::maxOrder() - 1; + + for (const auto &word : + std::span{contextWords}.last(std::min(contextWords.size(), needed))) { d->contextWords_.push_back( WordNode(word, d->ime_->model()->index(word))); } + while (d->contextWords_.size() > needed) { + d->contextWords_.pop_front(); + } +} + +std::vector PinyinContext::contextWords() const { + FCITX_D(); + std::vector words; + words.reserve(d->contextWords_.size()); + for (const auto &word : d->contextWords_) { + words.push_back(word.word()); + } + return words; } bool PinyinContext::learnWord() { return false; } diff --git a/src/libime/pinyin/pinyincontext.h b/src/libime/pinyin/pinyincontext.h index fec543a..c877ac2 100644 --- a/src/libime/pinyin/pinyincontext.h +++ b/src/libime/pinyin/pinyincontext.h @@ -144,6 +144,26 @@ class LIBIMEPINYIN_EXPORT PinyinContext : public InputBuffer { */ void setContextWords(const std::vector &contextWords); + /** + * Clear context words. + * @since 1.1.13 + */ + void clearContextWords(); + + /** + * Append context words for better prediction. + * @param contextWords The context words. + * @since 1.1.13 + */ + void appendContextWords(const std::vector &contextWords); + + /** + * Get context words for better prediction. + * @return current context words + * @since 1.1.13 + */ + std::vector contextWords() const; + protected: bool typeImpl(const char *s, size_t length) override; diff --git a/test/testpinyincontext.cpp b/test/testpinyincontext.cpp index ed63cf3..e1205a3 100644 --- a/test/testpinyincontext.cpp +++ b/test/testpinyincontext.cpp @@ -10,7 +10,9 @@ #include #include #include +#include #include +#include #include #include "libime/core/historybigram.h" #include "libime/core/lattice.h" @@ -262,7 +264,15 @@ int main() { } { c.clear(); - c.setContextWords({"他", "爱"}); + c.setContextWords({"我", "不", "知道"}); + FCITX_ASSERT(c.contextWords() == + std::vector{"不", "知道"}); + c.setContextWords({"谁", "他"}); + FCITX_ASSERT(c.contextWords() == + std::vector{"谁", "他"}); + c.appendContextWords({"爱"}); + FCITX_ASSERT(c.contextWords() == + std::vector{"他", "爱"}); c.type("ta"); size_t i = 0; for (const auto &candidate : c.candidatesToCursor()) {