[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[elpa] externals/pyim 9328e361b1 2/3: Add pyim-candidates-get-chief and
From: |
ELPA Syncer |
Subject: |
[elpa] externals/pyim 9328e361b1 2/3: Add pyim-candidates-get-chief and use it. |
Date: |
Fri, 31 Dec 2021 22:57:46 -0500 (EST) |
branch: externals/pyim
commit 9328e361b1529d8c02abc0f37cc5da5b1935bcfe
Author: Feng Shu <tumashu@163.com>
Commit: Feng Shu <tumashu@163.com>
Add pyim-candidates-get-chief and use it.
* pyim.el (pyim-select-word:pinyin): Simplify.
* pyim-candidates.el (pyim-candidates-possible-chiefs): New
variable.
(pyim-candidates-add-possible-chief, pyim-candidates-get-chief):
New function.
(pyim-candidates-create:xingma, pyim-candidates-create-quanpin):
Use pyim-candidates-get-chief.
---
pyim-candidates.el | 50 ++++++++++++++++++++++++++++++++++++++++++++------
pyim-process.el | 1 +
pyim.el | 15 +++------------
3 files changed, 48 insertions(+), 18 deletions(-)
diff --git a/pyim-candidates.el b/pyim-candidates.el
index 2f28e8888c..ba5ca50d33 100644
--- a/pyim-candidates.el
+++ b/pyim-candidates.el
@@ -52,6 +52,9 @@
细节信息请参考 `pyim-page-refresh' 的 docstring.")
+(defvar pyim-candidates-possible-chiefs nil
+ "可能做第一位候选词的词条列表。")
+
(pyim-register-local-variables
'(pyim-candidates pyim-candidate-position))
@@ -69,6 +72,38 @@ IMOBJS 获得候选词条。"
(funcall (intern (format "pyim-candidates-create:%S" class))
imobjs scheme-name async)))))
+(defun pyim-candidates-add-possible-chief (word)
+ "将 WORD 添加到 `pyim-candidates-possible-chiefs'."
+ (push word pyim-candidates-possible-chiefs)
+ (setq pyim-candidates-possible-chiefs
+ (cl-subseq pyim-candidates-possible-chiefs 0
+ (min 100 (length pyim-candidates-possible-chiefs)))))
+
+(defun pyim-candidates-get-chief (scheme-name &optional personal-words
common-words)
+ "选取第一位候选词。"
+ (let* ((class (pyim-scheme-get-option scheme-name :class))
+ (words pyim-candidates-possible-chiefs)
+ (length (length words))
+ ;; NOTE: 网上传言,一段话平均70个字,按照一个词两个字估算,100个词大概
+ ;; 为两段话。
+ (words100 (cl-subseq words 0 (min 100 length)))
+ ;; NOTE: 10个词大概1句话。
+ (words10 (cl-subseq words 0 (min 10 length))))
+ (if (equal class 'xingma)
+ ;; 形码输入法选择从词库里面获取到的第一个词条。
+ (car common-words)
+ (or
+ ;; 最近输入的10个词中出现一次以上。
+ (cl-find-if (lambda (word)
+ (> (cl-count word words10 :test #'equal) 1))
+ personal-words)
+ ;; 最近输入的100个词中出现过三次以上。
+ (cl-find-if (lambda (word)
+ (> (cl-count word words100 :test #'equal) 3))
+ personal-words)
+ ;; 个人词条中的第一个词。
+ (car personal-words)))))
+
(defun pyim-candidates-create:xingma (imobjs scheme-name &optional async)
"`pyim-candidates-create' 处理五笔仓颉等形码输入法的函数."
(unless async
@@ -98,7 +133,9 @@ IMOBJS 获得候选词条。"
;; NOTE: 下面这种策略是否合理?
;; 1. 第一个词选择公共词库中的第一个词。
;; 2. 剩下的分成常用字和词,常用字优先排,字和词各按 count 大小排序。
- (let* ((first-word (car (pyim-dcache-get last-code
'(code2word))))
+ (let* ((personal-words (pyim-dcache-get last-code
'(icode2word)))
+ (common-words (pyim-dcache-get last-code '(code2word)))
+ (chief-word (pyim-candidates-get-chief scheme-name
personal-words common-words))
(chars (cl-remove-if (lambda (word)
;; NOTE:
常用字在这里的定义是用户输入次数超过30次的汉字,30这个数字的选取是非常主观的,也许有
;; 更合理的取值。
@@ -169,7 +206,7 @@ IMOBJS 获得候选词条。"
(defun pyim-candidates-create-quanpin (imobjs scheme-name &optional
fast-search)
"`pyim-candidates-create:quanpin' 内部使用的函数。"
- (let (jianpin-words znabc-words personal-words common-words pinyin-chars-1
pinyin-chars-2)
+ (let (jianpin-words znabc-words personal-words common-words pinyin-chars-1
pinyin-chars-2 chief-word)
;; 智能ABC模式,得到尽可能的拼音组合,查询这些组合,得到的词条做为联想词。
(let ((codes (mapcar (lambda (x)
(pyim-subconcat x "-"))
@@ -250,13 +287,13 @@ IMOBJS 获得候选词条。"
;; 个人词条排序:使用词频信息对个人词库得到的候选词排序,第一个词条的位置
;; 比较特殊,不参与排序,具体原因请参考 `pyim-page-select-word' 中的
;; comment.
- (setq personal-words
- `(,(car personal-words)
- ,@(pyim-candidates-sort (cdr personal-words))))
+ (setq personal-words (pyim-candidates-sort personal-words))
+ (setq chief-word (pyim-candidates-get-chief scheme-name personal-words))
;; 调试输出
(when pyim-debug
(print (list :imobjs imobjs
+ :chief-word chief-word
:personal-words personal-words
:common-words common-words
:jianpin-words jianpin-words
@@ -270,7 +307,8 @@ IMOBJS 获得候选词条。"
(delete-dups
(delq nil
- `(,@personal-words
+ `(,chief-word
+ ,@personal-words
,@jianpin-words
,@common-words
,@znabc-words
diff --git a/pyim-process.el b/pyim-process.el
index bc5c0d5284..fe748978d4 100644
--- a/pyim-process.el
+++ b/pyim-process.el
@@ -572,6 +572,7 @@ BUG:拼音无法有效地处理多音字。"
(codes (pyim-cstring-to-codes
word scheme-name
(or criteria pyim-cstring-to-code-criteria))))
+ (pyim-candidates-add-possible-chief word)
;; 保存对应词条的词频
(when (> (length word) 0)
(pyim-dcache-update-wordcount word (or wordcount-handler #'1+)))
diff --git a/pyim.el b/pyim.el
index 283789befd..b86ee25f66 100644
--- a/pyim.el
+++ b/pyim.el
@@ -562,18 +562,9 @@ FILE 的格式与 `pyim-dcache-export' 生成的文件格式相同,
;; 择更加好用。
(goto-char (pyim-process-next-imelem-position 20 t 1)))
(pyim-process-run))
- ;; pyim 词频调整策略:
- ;; 1. 如果一个词条是用户在输入过程中,自己新建的词条,那么就将这个词条
- ;; 添加到个人词库的后面(不放置前面是为了减少误输词条的影响)。
- ;; 2. 如果输入的词条,先前已经在候选词列表中,就自动将其放到第一位。
- ;; 这样的话,一个新词要输入两遍之后才可能出现在第一位。
- ;; 3. pyim 在启动的时候,会使用词频信息,对个人词库作一次排序。
- ;; 用作 pyim 下一次使用。
- (unless (pyim-process-select-subword-p) ;NOTE:
以词定字的时候,到底应不应该保存词条呢,需要进一步研究。
- (if (member (pyim-process-get-outcome) (pyim-process-get-candidates))
- (pyim-process-create-word (pyim-process-get-outcome) t)
- (pyim-process-create-word (pyim-process-get-outcome))))
-
+ ;;NOTE: 以词定字的时候,到底应不应该保存词条呢,需要进一步研究。
+ (unless (pyim-process-select-subword-p)
+ (pyim-process-create-word (pyim-process-get-outcome) t))
(pyim-process-terminate)
;; pyim 使用这个 hook 来处理联想词。
(run-hooks 'pyim-select-finish-hook))))