[elpa] externals/pyim 9328e361b1 2/3: Add pyim-candidates-get-chief and

emacs-elpa-diffs

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[elpa] externals/pyim 9328e361b1 2/3: Add pyim-candidates-get-chief and

From:	ELPA Syncer
Subject:	[elpa] externals/pyim 9328e361b1 2/3: Add pyim-candidates-get-chief and use it.
Date:	Fri, 31 Dec 2021 22:57:46 -0500 (EST)

branch: externals/pyim
commit 9328e361b1529d8c02abc0f37cc5da5b1935bcfe
Author: Feng Shu <tumashu@163.com>
Commit: Feng Shu <tumashu@163.com>

    Add pyim-candidates-get-chief and use it.
    
        * pyim.el (pyim-select-word:pinyin): Simplify.
    
            * pyim-candidates.el (pyim-candidates-possible-chiefs): New 
variable.
            (pyim-candidates-add-possible-chief, pyim-candidates-get-chief): 
New function.
            (pyim-candidates-create:xingma, pyim-candidates-create-quanpin): 
Use pyim-candidates-get-chief.
---
 pyim-candidates.el | 50 ++++++++++++++++++++++++++++++++++++++++++++------
 pyim-process.el    |  1 +
 pyim.el            | 15 +++------------
 3 files changed, 48 insertions(+), 18 deletions(-)

diff --git a/pyim-candidates.el b/pyim-candidates.el
index 2f28e8888c..ba5ca50d33 100644
--- a/pyim-candidates.el
+++ b/pyim-candidates.el
@@ -52,6 +52,9 @@
 
 细节信息请参考 `pyim-page-refresh' 的 docstring.")
 
+(defvar pyim-candidates-possible-chiefs nil
+  "可能做第一位候选词的词条列表。")
+
 (pyim-register-local-variables
  '(pyim-candidates pyim-candidate-position))
 
@@ -69,6 +72,38 @@ IMOBJS 获得候选词条。"
         (funcall (intern (format "pyim-candidates-create:%S" class))
                  imobjs scheme-name async)))))
 
+(defun pyim-candidates-add-possible-chief (word)
+  "将 WORD 添加到 `pyim-candidates-possible-chiefs'."
+  (push word pyim-candidates-possible-chiefs)
+  (setq pyim-candidates-possible-chiefs
+        (cl-subseq pyim-candidates-possible-chiefs 0
+                   (min 100 (length pyim-candidates-possible-chiefs)))))
+
+(defun pyim-candidates-get-chief (scheme-name &optional personal-words 
common-words)
+  "选取第一位候选词。"
+  (let* ((class (pyim-scheme-get-option scheme-name :class))
+         (words pyim-candidates-possible-chiefs)
+         (length (length words))
+         ;; NOTE: 网上传言，一段话平均70个字，按照一个词两个字估算，100个词大概
+         ;; 为两段话。
+         (words100 (cl-subseq words 0 (min 100 length)))
+         ;; NOTE: 10个词大概1句话。
+         (words10 (cl-subseq words 0 (min 10 length))))
+    (if (equal class 'xingma)
+        ;; 形码输入法选择从词库里面获取到的第一个词条。
+        (car common-words)
+      (or
+       ;; 最近输入的10个词中出现一次以上。
+       (cl-find-if (lambda (word)
+                     (> (cl-count word words10 :test #'equal) 1))
+                   personal-words)
+       ;; 最近输入的100个词中出现过三次以上。
+       (cl-find-if (lambda (word)
+                     (> (cl-count word words100 :test #'equal) 3))
+                   personal-words)
+       ;; 个人词条中的第一个词。
+       (car personal-words)))))
+
 (defun pyim-candidates-create:xingma (imobjs scheme-name &optional async)
   "`pyim-candidates-create' 处理五笔仓颉等形码输入法的函数."
   (unless async
@@ -98,7 +133,9 @@ IMOBJS 获得候选词条。"
                 ;; NOTE: 下面这种策略是否合理？
                 ;; 1. 第一个词选择公共词库中的第一个词。
                 ;; 2. 剩下的分成常用字和词，常用字优先排，字和词各按 count 大小排序。
-                (let* ((first-word (car (pyim-dcache-get last-code 
'(code2word))))
+                (let* ((personal-words (pyim-dcache-get last-code 
'(icode2word)))
+                       (common-words (pyim-dcache-get last-code '(code2word)))
+                       (chief-word (pyim-candidates-get-chief scheme-name 
personal-words common-words))
                        (chars (cl-remove-if (lambda (word)
                                               ;; NOTE: 
常用字在这里的定义是用户输入次数超过30次的汉字，30这个数字的选取是非常主观的，也许有
                                               ;; 更合理的取值。
@@ -169,7 +206,7 @@ IMOBJS 获得候选词条。"
 
 (defun pyim-candidates-create-quanpin (imobjs scheme-name &optional 
fast-search)
   "`pyim-candidates-create:quanpin' 内部使用的函数。"
-  (let (jianpin-words znabc-words personal-words common-words pinyin-chars-1 
pinyin-chars-2)
+  (let (jianpin-words znabc-words personal-words common-words pinyin-chars-1 
pinyin-chars-2 chief-word)
     ;; 智能ABC模式，得到尽可能的拼音组合，查询这些组合，得到的词条做为联想词。
     (let ((codes (mapcar (lambda (x)
                            (pyim-subconcat x "-"))
@@ -250,13 +287,13 @@ IMOBJS 获得候选词条。"
     ;; 个人词条排序：使用词频信息对个人词库得到的候选词排序，第一个词条的位置
     ;; 比较特殊，不参与排序，具体原因请参考 `pyim-page-select-word' 中的
     ;; comment.
-    (setq personal-words
-          `(,(car personal-words)
-            ,@(pyim-candidates-sort (cdr personal-words))))
+    (setq personal-words (pyim-candidates-sort personal-words))
+    (setq chief-word (pyim-candidates-get-chief scheme-name personal-words))
 
     ;; 调试输出
     (when pyim-debug
       (print (list :imobjs imobjs
+                   :chief-word chief-word
                    :personal-words personal-words
                    :common-words common-words
                    :jianpin-words jianpin-words
@@ -270,7 +307,8 @@ IMOBJS 获得候选词条。"
 
     (delete-dups
      (delq nil
-           `(,@personal-words
+           `(,chief-word
+             ,@personal-words
              ,@jianpin-words
              ,@common-words
              ,@znabc-words
diff --git a/pyim-process.el b/pyim-process.el
index bc5c0d5284..fe748978d4 100644
--- a/pyim-process.el
+++ b/pyim-process.el
@@ -572,6 +572,7 @@ BUG：拼音无法有效地处理多音字。"
            (codes (pyim-cstring-to-codes
                    word scheme-name
                    (or criteria pyim-cstring-to-code-criteria))))
+      (pyim-candidates-add-possible-chief word)
       ;; 保存对应词条的词频
       (when (> (length word) 0)
         (pyim-dcache-update-wordcount word (or wordcount-handler #'1+)))
diff --git a/pyim.el b/pyim.el
index 283789befd..b86ee25f66 100644
--- a/pyim.el
+++ b/pyim.el
@@ -562,18 +562,9 @@ FILE 的格式与 `pyim-dcache-export' 生成的文件格式相同，
             ;; 择更加好用。
             (goto-char (pyim-process-next-imelem-position 20 t 1)))
           (pyim-process-run))
-      ;; pyim 词频调整策略：
-      ;; 1. 如果一个词条是用户在输入过程中，自己新建的词条，那么就将这个词条
-      ;;    添加到个人词库的后面（不放置前面是为了减少误输词条的影响）。
-      ;; 2. 如果输入的词条，先前已经在候选词列表中，就自动将其放到第一位。
-      ;;    这样的话，一个新词要输入两遍之后才可能出现在第一位。
-      ;; 3. pyim 在启动的时候，会使用词频信息，对个人词库作一次排序。
-      ;;    用作 pyim 下一次使用。
-      (unless (pyim-process-select-subword-p) ;NOTE: 
以词定字的时候，到底应不应该保存词条呢，需要进一步研究。
-        (if (member (pyim-process-get-outcome) (pyim-process-get-candidates))
-            (pyim-process-create-word (pyim-process-get-outcome) t)
-          (pyim-process-create-word (pyim-process-get-outcome))))
-
+      ;;NOTE: 以词定字的时候，到底应不应该保存词条呢，需要进一步研究。
+      (unless (pyim-process-select-subword-p)
+        (pyim-process-create-word (pyim-process-get-outcome) t))
       (pyim-process-terminate)
       ;; pyim 使用这个 hook 来处理联想词。
       (run-hooks 'pyim-select-finish-hook))))

[Prev in Thread]

Current Thread

[Next in Thread]

[elpa] externals/pyim updated (3caad23478 -> 143f13ca7d), ELPA Syncer, 2021/12/31
- [elpa] externals/pyim abcaad950e 1/3: Add pyim-candidates-sort and use it., ELPA Syncer, 2021/12/31
- [elpa] externals/pyim 143f13ca7d 3/3: pyim-process-create-word: handle subword., ELPA Syncer, 2021/12/31
- [elpa] externals/pyim 9328e361b1 2/3: Add pyim-candidates-get-chief and use it., ELPA Syncer <=

Prev by Date: [elpa] externals/pyim 143f13ca7d 3/3: pyim-process-create-word: handle subword.
Next by Date: [elpa] elpa-admin da4fd19497: New environment variable ELPA_DEBUG
Previous by thread: [elpa] externals/pyim 143f13ca7d 3/3: pyim-process-create-word: handle subword.
Next by thread: [elpa] elpa-admin da4fd19497: New environment variable ELPA_DEBUG
Index(es):
- Date
- Thread